You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/26 02:12:25 UTC

[01/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Repository: incubator-systemml
Updated Branches:
  refs/heads/branch-0.9 cf5144e7d -> ffcdf6ea3


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_T.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_T.R b/src/test/scripts/functions/unary/scalar/DFTest_T.R
index f068f78..00e613b 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_T.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_T.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qt(as.numeric(args[1]), df=as.numeric(args[2]));
-p = pt(qtle, df=as.numeric(args[2]));
-pl = pt(qtle, df=as.numeric(args[2]), lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[3]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qt(as.numeric(args[1]), df=as.numeric(args[2]));
+p = pt(qtle, df=as.numeric(args[2]));
+pl = pt(qtle, df=as.numeric(args[2]), lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[3]); 
+


[32/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
index 27058de..a4cfaa6 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
@@ -1,549 +1,549 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.BufferedReader;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.Reader;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.parser.DataExpression;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
-import org.apache.sysml.runtime.io.MatrixReader;
-import org.apache.sysml.runtime.matrix.CSVReblockMR;
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.util.MapReduceTool;
-import org.apache.sysml.runtime.util.UtilFunctions;
-import org.apache.sysml.utils.JSONHelper;
-
-
-@SuppressWarnings("deprecation")
-public class TfUtils implements Serializable{
-	
-	private static final long serialVersionUID = 526252850872633125L;
-
-	private OmitAgent _oa = null;
-	private MVImputeAgent _mia = null;
-	private RecodeAgent _ra = null;	
-	private BinAgent _ba = null;
-	private DummycodeAgent _da = null;
-	
-	private long _numRecordsInPartFile;		// Total number of records in the data file
-	private long _numValidRecords;			// (_numRecordsInPartFile - #of omitted records)
-	private long _numTransformedRows; 		// Number of rows after applying transformations
-	private long _numTransformedColumns; 	// Number of columns after applying transformations
-
-	private String _headerLine = null;
-	private boolean _hasHeader;
-	private Pattern _delim = null;
-	private String _delimString = null;
-	private String[] _NAstrings = null;
-	private String[] _outputColumnNames = null;
-	private long _numInputCols = -1;
-	
-	private String _tfMtdDir = null;
-	private String _specFile = null;
-	private String _offsetFile = null;
-	private String _tmpDir = null;
-	private String _outputPath = null;
-	
-	protected static boolean checkValidInputFile(FileSystem fs, Path path, boolean err)
-			throws IOException {
-		// check non-existing file
-		if (!fs.exists(path))
-			if ( err )
-				throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");
-			else
-				return false;
-
-		// check for empty file
-		if (MapReduceTool.isFileEmpty(fs, path.toString()))
-			if ( err )
-			throw new EOFException("Empty input file " + path.toString() + ".");
-			else
-				return false;
-		
-		return true;
-	}
-	
-	public static String getPartFileName(JobConf job) throws IOException {
-		FileSystem fs = FileSystem.get(job);
-		Path thisPath=new Path(job.get("map.input.file")).makeQualified(fs);
-		return thisPath.toString();
-	}
-	
-	public static boolean isPartFileWithHeader(JobConf job) throws IOException {
-		FileSystem fs = FileSystem.get(job);
-		
-		String thisfile=getPartFileName(job);
-		Path smallestFilePath=new Path(job.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(fs);
-		
-		if(thisfile.toString().equals(smallestFilePath.toString()))
-			return true;
-		else
-			return false;
-	}
-	
-	public static JSONObject readSpec(FileSystem fs, String specFile) throws IOException {
-		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(specFile))));
-		JSONObject obj = JSONHelper.parse(br);
-		br.close();
-		return obj;
-	}
-	
-	/**
-	 * Prepare NA strings so that they can be sent to workers via JobConf.
-	 * A "dummy" string is added at the end to handle the case of empty strings.
-	 * @param na
-	 * @return
-	 */
-	public static String prepNAStrings(String na) {
-		return na  + DataExpression.DELIM_NA_STRING_SEP + "dummy";
-	}
-	
-	public static String[] parseNAStrings(String na) 
-	{
-		if ( na == null )
-			return null;
-		
-		String[] tmp = Pattern.compile(Pattern.quote(DataExpression.DELIM_NA_STRING_SEP)).split(na, -1);
-		return tmp; //Arrays.copyOf(tmp, tmp.length-1);
-	}
-	
-	public static String[] parseNAStrings(JobConf job) 
-	{
-		return parseNAStrings(job.get(MRJobConfiguration.TF_NA_STRINGS));
-	}
-	
-	private void createAgents(JSONObject spec) throws IOException, JSONException {
-		_oa = new OmitAgent(spec);
-		_mia = new MVImputeAgent(spec);
-		_ra = new RecodeAgent(spec);
-		_ba = new BinAgent(spec);
-		_da = new DummycodeAgent(spec, _numInputCols);
-	}
-	
-	public void setupAgents(OmitAgent oa, MVImputeAgent mia, RecodeAgent ra, BinAgent ba, DummycodeAgent da)  {
-		_oa = oa;
-		_mia = mia;
-		_ra = ra;
-		_ba = ba;
-		_da = da;
-	}
-	
-	private void parseColumnNames() {
-		_outputColumnNames = _delim.split(_headerLine, -1);
-		for(int i=0; i < _outputColumnNames.length; i++)
-			_outputColumnNames[i] = UtilFunctions.unquote(_outputColumnNames[i]);
-	}
-	
-	private void init(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long numCols, String offsetFile, String tmpPath, String outputPath) throws IOException, JSONException
-	{
-		_numRecordsInPartFile = 0;
-		_numValidRecords = 0;
-		_numTransformedRows = 0;
-		_numTransformedColumns = 0;
-		
-		_headerLine = headerLine;
-		_hasHeader = hasHeader;
-		_delimString = delim;
-		_delim = Pattern.compile(Pattern.quote(delim));
-		_NAstrings = naStrings;
-		_numInputCols = numCols;
-		_offsetFile = offsetFile;
-		_tmpDir = tmpPath;
-		_outputPath = outputPath;
-		
-		parseColumnNames();		
-		createAgents(spec);
-	}
-	
-	public TfUtils(JobConf job, boolean minimal) 
-		throws IOException, JSONException 
-	{
-		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
-			ConfigurationManager.setCachedJobConf(job);
-		}
-		
-		_NAstrings = TfUtils.parseNAStrings(job);
-		_specFile = job.get(MRJobConfiguration.TF_SPEC_FILE);
-		
-		FileSystem fs = FileSystem.get(job);
-		JSONObject spec = TfUtils.readSpec(fs, _specFile);
-		
-		_oa = new OmitAgent(spec);
-	}
-	
-	// called from GenTFMtdMapper, ApplyTf (Hadoop)
-	public TfUtils(JobConf job) 
-		throws IOException, JSONException 
-	{
-		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
-			ConfigurationManager.setCachedJobConf(job);
-		}
-		
-		boolean hasHeader = Boolean.parseBoolean(job.get(MRJobConfiguration.TF_HAS_HEADER));
-		//Pattern delim = Pattern.compile(Pattern.quote(job.get(MRJobConfiguration.TF_DELIM)));
-		String[] naStrings = TfUtils.parseNAStrings(job);
-		
-		long numCols = UtilFunctions.parseToLong( job.get(MRJobConfiguration.TF_NUM_COLS) );		// #of columns in input data
-			
-		String specFile = job.get(MRJobConfiguration.TF_SPEC_FILE);
-		String offsetFile = job.get(MRJobConfiguration.TF_OFFSETS_FILE);
-		String tmpPath = job.get(MRJobConfiguration.TF_TMP_LOC);
-		String outputPath = FileOutputFormat.getOutputPath(job).toString();
-		FileSystem fs = FileSystem.get(job);
-		JSONObject spec = TfUtils.readSpec(fs, specFile);
-		
-		init(job.get(MRJobConfiguration.TF_HEADER), hasHeader, job.get(MRJobConfiguration.TF_DELIM), naStrings, spec, numCols, offsetFile, tmpPath, outputPath);
-	}
-	
-	// called from GenTfMtdReducer 
-	public TfUtils(JobConf job, String tfMtdDir) throws IOException, JSONException 
-	{
-		this(job);
-		_tfMtdDir = tfMtdDir;
-	}
-	
-	// called from GenTFMtdReducer and ApplyTf (Spark)
-	public TfUtils(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long ncol, String tfMtdDir, String offsetFile, String tmpPath) throws IOException, JSONException {
-		init (headerLine, hasHeader, delim, naStrings, spec, ncol, offsetFile, tmpPath, null);
-		_tfMtdDir = tfMtdDir;
-	}
-	
-	public void incrValid() { _numValidRecords++; }
-	public long getValid()  { return _numValidRecords; }
-	public long getTotal()  { return _numRecordsInPartFile; }
-	public long getNumTransformedRows() 	{ return _numTransformedRows; }
-	public long getNumTransformedColumns() 	{ return _numTransformedColumns; }
-	
-	public String getHeader() 		{ return _headerLine; }
-	public boolean hasHeader() 		{ return _hasHeader; }
-	public String getDelimString() 	{ return _delimString; }
-	public Pattern getDelim() 		{ return _delim; }
-	public String[] getNAStrings() 	{ return _NAstrings; }
-	public long getNumCols() 		{ return _numInputCols; }
-	
-	public String getSpecFile() 	{ return _specFile; }
-	public String getTfMtdDir() 	{ return _tfMtdDir; }
-	public String getOffsetFile() 	{ return _offsetFile; }
-	public String getTmpDir() 		{ return _tmpDir; }
-	public String getOutputPath()	{ return _outputPath; }
-	
-	public String getName(int colID) { return _outputColumnNames[colID-1]; }
-	
-	public void setValid(long n) { _numValidRecords = n;}
-	public void incrTotal() { _numRecordsInPartFile++; }
-	public void setTotal(long n) { _numRecordsInPartFile = n;}
-	
-	public OmitAgent 	  getOmitAgent() 	{ 	return _oa; }
-	public MVImputeAgent  getMVImputeAgent(){ 	return _mia;}
-	public RecodeAgent 	  getRecodeAgent() 	{ 	return _ra; }
-	public BinAgent 	  getBinAgent() 	{ 	return _ba; }
-	public DummycodeAgent getDummycodeAgent() { return _da; }
-	
-	/**
-	 * Function that checks if the given string is one of NA strings.
-	 * 
-	 * @param w
-	 * @return
-	 */
-	public boolean isNA(String w) {
-		if(_NAstrings == null)
-			return false;
-		
-		for(String na : _NAstrings) {
-			if(w.equals(na))
-				return true;
-		}
-		return false;
-	}
-	
-	public String[] getWords(Text line)
-	{
-		return getWords(line.toString());
-	}
-	
-
-	public String[] getWords(String line) 
-	{
-		return getDelim().split(line.trim(), -1);
-	}
-	
-	/**
-	 * Process a given row to construct transformation metadata.
-	 * 
-	 * @param line
-	 * @return
-	 * @throws IOException
-	 */
-	public String[] prepareTfMtd(String line) throws IOException {
-		String[] words = getWords(line);
-		if(!getOmitAgent().omit(words, this))
-		{
-			getMVImputeAgent().prepare(words, this);
-			getRecodeAgent().prepare(words, this);
-			getBinAgent().prepare(words, this);
-			incrValid();;
-		}
-		incrTotal();
-		
-		return words;
-	}
-	
-	public void loadTfMetadata() throws IOException 
-	{
-		JobConf job = ConfigurationManager.getCachedJobConf();
-		loadTfMetadata(job, false);
-	}
-	
-	public void loadTfMetadata(JobConf job, boolean fromLocalFS) throws IOException
-	{
-		Path tfMtdDir = null; 
-		FileSystem fs = null;
-		
-		if(fromLocalFS) {
-			// metadata must be read from local file system (e.g., distributed cache in the case of Hadoop)
-			tfMtdDir = (DistributedCache.getLocalCacheFiles(job))[0];
-			fs = FileSystem.getLocal(job);
-		}
-		else {
-			fs = FileSystem.get(job);
-			tfMtdDir = new Path(getTfMtdDir());
-		}
-		
-		// load transformation metadata 
-		getMVImputeAgent().loadTxMtd(job, fs, tfMtdDir, this);
-		getRecodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
-		getBinAgent().loadTxMtd(job, fs, tfMtdDir, this);
-		
-		// associate recode maps and bin definitions with dummycoding agent,
-		// as recoded and binned columns are typically dummycoded
-		getDummycodeAgent().setRecodeMaps( getRecodeAgent().getRecodeMaps() );
-		getDummycodeAgent().setNumBins(getBinAgent().getBinList(), getBinAgent().getNumBins());
-		getDummycodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
-
-	}
-	
-	/*public void loadTfMetadata () throws IOException
-	{
-		Path tfMtdDir = (DistributedCache.getLocalCacheFiles(_rJob))[0];
-		FileSystem localFS = FileSystem.getLocal(_rJob);
-		
-		loadTfMetadata(_rJob, localFS, tfMtdDir);
-		
-		FileSystem fs;
-		fs = FileSystem.get(_rJob);
-		Path thisPath=new Path(_rJob.get("map.input.file")).makeQualified(fs);
-		String thisfile=thisPath.toString();
-			
-		Path smallestFilePath=new Path(_rJob.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(fs);
-		if(thisfile.toString().equals(smallestFilePath.toString()))
-			_partFileWithHeader=true;
-		else
-			_partFileWithHeader = false;
-	}*/
-
-
-	public String processHeaderLine() throws IOException 
-	{
-		FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
-		String dcdHeader = getDummycodeAgent().constructDummycodedHeader(getHeader(), getDelim());
-		getDummycodeAgent().genDcdMapsAndColTypes(fs, getTmpDir(), (int) getNumCols(), this);
-		
-		// write header information (before and after transformation) to temporary path
-		// these files are copied into txMtdPath, once the ApplyTf job is complete.
-		DataTransform.generateHeaderFiles(fs, getTmpDir(), getHeader(), dcdHeader);
-
-		return dcdHeader;
-		//_numTransformedColumns = getDelim().split(dcdHeader, -1).length; 
-		//return _numTransformedColumns;
-	}
-
-	public boolean omit(String[] words) {
-		if(getOmitAgent() == null)
-			return false;
-		return getOmitAgent().omit(words, this);
-	}
-	
-	
-	public String[] apply(String[] words) {
-		return apply(words, false);
-	}
-	
-	/**
-	 * Function to apply transformation metadata on a given row.
-	 * 
-	 * @param words
-	 * @param optimizeMaps
-	 * @return
-	 */
-	public String[] apply ( String[] words, boolean optimizeMaps ) 
-	{
-		words = getMVImputeAgent().apply(words, this);
-		
-		if(optimizeMaps)
-			// specific case of transform() invoked from CP (to save boxing and unboxing)
-			words = getRecodeAgent().cp_apply(words, this);
-		else
-			words = getRecodeAgent().apply(words, this);
-
-		words = getBinAgent().apply(words, this);
-		words = getDummycodeAgent().apply(words, this);
-		
-		_numTransformedRows++;
-		
-		return words;
-	}
-	
-	public void check(String []words) throws DMLRuntimeException 
-	{
-		boolean checkEmptyString = ( getNAStrings() != null );
-		if ( checkEmptyString ) 
-		{
-			final String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
-			for(int i=0; i<words.length; i++) 
-				if ( words[i] != null && words[i].equals(""))
-					throw new DMLRuntimeException(msg + getDummycodeAgent().mapDcdColumnID(i+1));
-		}
-	}
-	
-	public String checkAndPrepOutputString(String []words) throws DMLRuntimeException 
-	{
-		return checkAndPrepOutputString(words, new StringBuilder());
-	}
-	
-	public String checkAndPrepOutputString(String []words, StringBuilder sb) throws DMLRuntimeException 
-	{
-		/*
-		 * Check if empty strings ("") have to be handled.
-		 * 
-		 * Unless na.strings are provided, empty strings are (implicitly) considered as value zero.
-		 * When na.strings are provided, then "" is considered a missing value indicator, and the 
-		 * user is expected to provide an appropriate imputation method. Therefore, when na.strings 
-		 * are provided, "" encountered in any column (after all transformations are applied) 
-		 * denotes an erroneous condition.  
-		 */
-		boolean checkEmptyString = ( getNAStrings() != null ); //&& !MVImputeAgent.isNA("", TransformationAgent.NAstrings) ) {
-		
-		//StringBuilder sb = new StringBuilder();
-		sb.setLength(0);
-		int i =0;
-		
-		if ( checkEmptyString ) 
-		{
-			final String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
-			if ( words[0] != null ) 
-				if ( words[0].equals("") )
-					throw new DMLRuntimeException( msg + getDummycodeAgent().mapDcdColumnID(1));
-				else 
-					sb.append(words[0]);
-			else
-				sb.append("0");
-			
-			for(i=1; i<words.length; i++) 
-			{
-				sb.append(_delimString);
-				
-				if ( words[i] != null ) 
-					if ( words[i].equals("") )
-						throw new DMLRuntimeException(msg + getDummycodeAgent().mapDcdColumnID(i+1));
-					else 
-						sb.append(words[i]);
-				else
-					sb.append("0");
-			}
-		}
-		else 
-		{
-			sb.append(words[0] != null ? words[0] : "0");
-			for(i=1; i<words.length; i++) 
-			{
-				sb.append(_delimString);
-				sb.append(words[i] != null ? words[i] : "0");
-			}
-		}
-		
-		return sb.toString();
-	}
-
-	private Reader initOffsetsReader(JobConf job) throws IOException 
-	{
-		Path path=new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
-		FileSystem fs = FileSystem.get(job);
-		Path[] files = MatrixReader.getSequenceFilePaths(fs, path);
-		if ( files.length != 1 )
-			throw new IOException("Expecting a single file under counters file: " + path.toString());
-		
-		Reader reader = new SequenceFile.Reader(fs, files[0], job);
-		
-		return reader;
-	}
-	
-	/**
-	 * Function to generate custom file names (transform-part-.....) for
-	 * mappers' output for ApplyTfCSV job. The idea is to find the index 
-	 * of (thisfile, fileoffset) in the list of all offsets from the 
-	 * counters/offsets file, which was generated from either GenTfMtdMR
-	 * or AssignRowIDMR job.
-	 * 
-	 */
-	public String getPartFileID(JobConf job, long offset) throws IOException
-	{
-		Reader reader = initOffsetsReader(job);
-		
-		ByteWritable key=new ByteWritable();
-		OffsetCount value=new OffsetCount();
-		String thisFile = TfUtils.getPartFileName(job);
-		
-		int id = 0;
-		while (reader.next(key, value)) {
-			if ( thisFile.equals(value.filename) && value.fileOffset == offset ) 
-				break;
-			id++;
-		}
-		reader.close();
-		
-		String sid = Integer.toString(id);
-		char[] carr = new char[5-sid.length()];
-		Arrays.fill(carr, '0');
-		String ret = (new String(carr)).concat(sid);
-		
-		return ret;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.BufferedReader;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.parser.DataExpression;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysml.runtime.io.MatrixReader;
+import org.apache.sysml.runtime.matrix.CSVReblockMR;
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.util.MapReduceTool;
+import org.apache.sysml.runtime.util.UtilFunctions;
+import org.apache.sysml.utils.JSONHelper;
+
+
+@SuppressWarnings("deprecation")
+public class TfUtils implements Serializable{
+	
+	private static final long serialVersionUID = 526252850872633125L;
+
+	private OmitAgent _oa = null;
+	private MVImputeAgent _mia = null;
+	private RecodeAgent _ra = null;	
+	private BinAgent _ba = null;
+	private DummycodeAgent _da = null;
+	
+	private long _numRecordsInPartFile;		// Total number of records in the data file
+	private long _numValidRecords;			// (_numRecordsInPartFile - #of omitted records)
+	private long _numTransformedRows; 		// Number of rows after applying transformations
+	private long _numTransformedColumns; 	// Number of columns after applying transformations
+
+	private String _headerLine = null;
+	private boolean _hasHeader;
+	private Pattern _delim = null;
+	private String _delimString = null;
+	private String[] _NAstrings = null;
+	private String[] _outputColumnNames = null;
+	private long _numInputCols = -1;
+	
+	private String _tfMtdDir = null;
+	private String _specFile = null;
+	private String _offsetFile = null;
+	private String _tmpDir = null;
+	private String _outputPath = null;
+	
+	protected static boolean checkValidInputFile(FileSystem fs, Path path, boolean err)
+			throws IOException {
+		// check non-existing file
+		if (!fs.exists(path))
+			if ( err )
+				throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");
+			else
+				return false;
+
+		// check for empty file
+		if (MapReduceTool.isFileEmpty(fs, path.toString()))
+			if ( err )
+			throw new EOFException("Empty input file " + path.toString() + ".");
+			else
+				return false;
+		
+		return true;
+	}
+	
+	public static String getPartFileName(JobConf job) throws IOException {
+		FileSystem fs = FileSystem.get(job);
+		Path thisPath=new Path(job.get("map.input.file")).makeQualified(fs);
+		return thisPath.toString();
+	}
+	
+	public static boolean isPartFileWithHeader(JobConf job) throws IOException {
+		FileSystem fs = FileSystem.get(job);
+		
+		String thisfile=getPartFileName(job);
+		Path smallestFilePath=new Path(job.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(fs);
+		
+		if(thisfile.toString().equals(smallestFilePath.toString()))
+			return true;
+		else
+			return false;
+	}
+	
+	public static JSONObject readSpec(FileSystem fs, String specFile) throws IOException {
+		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(specFile))));
+		JSONObject obj = JSONHelper.parse(br);
+		br.close();
+		return obj;
+	}
+	
+	/**
+	 * Prepare NA strings so that they can be sent to workers via JobConf.
+	 * A "dummy" string is added at the end to handle the case of empty strings.
+	 * @param na
+	 * @return
+	 */
+	public static String prepNAStrings(String na) {
+		return na  + DataExpression.DELIM_NA_STRING_SEP + "dummy";
+	}
+	
+	public static String[] parseNAStrings(String na) 
+	{
+		if ( na == null )
+			return null;
+		
+		String[] tmp = Pattern.compile(Pattern.quote(DataExpression.DELIM_NA_STRING_SEP)).split(na, -1);
+		return tmp; //Arrays.copyOf(tmp, tmp.length-1);
+	}
+	
+	public static String[] parseNAStrings(JobConf job) 
+	{
+		return parseNAStrings(job.get(MRJobConfiguration.TF_NA_STRINGS));
+	}
+	
+	private void createAgents(JSONObject spec) throws IOException, JSONException {
+		_oa = new OmitAgent(spec);
+		_mia = new MVImputeAgent(spec);
+		_ra = new RecodeAgent(spec);
+		_ba = new BinAgent(spec);
+		_da = new DummycodeAgent(spec, _numInputCols);
+	}
+	
+	public void setupAgents(OmitAgent oa, MVImputeAgent mia, RecodeAgent ra, BinAgent ba, DummycodeAgent da)  {
+		_oa = oa;
+		_mia = mia;
+		_ra = ra;
+		_ba = ba;
+		_da = da;
+	}
+	
+	private void parseColumnNames() {
+		_outputColumnNames = _delim.split(_headerLine, -1);
+		for(int i=0; i < _outputColumnNames.length; i++)
+			_outputColumnNames[i] = UtilFunctions.unquote(_outputColumnNames[i]);
+	}
+	
+	private void init(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long numCols, String offsetFile, String tmpPath, String outputPath) throws IOException, JSONException
+	{
+		_numRecordsInPartFile = 0;
+		_numValidRecords = 0;
+		_numTransformedRows = 0;
+		_numTransformedColumns = 0;
+		
+		_headerLine = headerLine;
+		_hasHeader = hasHeader;
+		_delimString = delim;
+		_delim = Pattern.compile(Pattern.quote(delim));
+		_NAstrings = naStrings;
+		_numInputCols = numCols;
+		_offsetFile = offsetFile;
+		_tmpDir = tmpPath;
+		_outputPath = outputPath;
+		
+		parseColumnNames();		
+		createAgents(spec);
+	}
+	
+	public TfUtils(JobConf job, boolean minimal) 
+		throws IOException, JSONException 
+	{
+		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+			ConfigurationManager.setCachedJobConf(job);
+		}
+		
+		_NAstrings = TfUtils.parseNAStrings(job);
+		_specFile = job.get(MRJobConfiguration.TF_SPEC_FILE);
+		
+		FileSystem fs = FileSystem.get(job);
+		JSONObject spec = TfUtils.readSpec(fs, _specFile);
+		
+		_oa = new OmitAgent(spec);
+	}
+	
+	// called from GenTFMtdMapper, ApplyTf (Hadoop)
+	public TfUtils(JobConf job) 
+		throws IOException, JSONException 
+	{
+		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+			ConfigurationManager.setCachedJobConf(job);
+		}
+		
+		boolean hasHeader = Boolean.parseBoolean(job.get(MRJobConfiguration.TF_HAS_HEADER));
+		//Pattern delim = Pattern.compile(Pattern.quote(job.get(MRJobConfiguration.TF_DELIM)));
+		String[] naStrings = TfUtils.parseNAStrings(job);
+		
+		long numCols = UtilFunctions.parseToLong( job.get(MRJobConfiguration.TF_NUM_COLS) );		// #of columns in input data
+			
+		String specFile = job.get(MRJobConfiguration.TF_SPEC_FILE);
+		String offsetFile = job.get(MRJobConfiguration.TF_OFFSETS_FILE);
+		String tmpPath = job.get(MRJobConfiguration.TF_TMP_LOC);
+		String outputPath = FileOutputFormat.getOutputPath(job).toString();
+		FileSystem fs = FileSystem.get(job);
+		JSONObject spec = TfUtils.readSpec(fs, specFile);
+		
+		init(job.get(MRJobConfiguration.TF_HEADER), hasHeader, job.get(MRJobConfiguration.TF_DELIM), naStrings, spec, numCols, offsetFile, tmpPath, outputPath);
+	}
+	
+	// called from GenTfMtdReducer 
+	public TfUtils(JobConf job, String tfMtdDir) throws IOException, JSONException 
+	{
+		this(job);
+		_tfMtdDir = tfMtdDir;
+	}
+	
+	// called from GenTFMtdReducer and ApplyTf (Spark)
+	public TfUtils(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long ncol, String tfMtdDir, String offsetFile, String tmpPath) throws IOException, JSONException {
+		init (headerLine, hasHeader, delim, naStrings, spec, ncol, offsetFile, tmpPath, null);
+		_tfMtdDir = tfMtdDir;
+	}
+	
+	public void incrValid() { _numValidRecords++; }
+	public long getValid()  { return _numValidRecords; }
+	public long getTotal()  { return _numRecordsInPartFile; }
+	public long getNumTransformedRows() 	{ return _numTransformedRows; }
+	public long getNumTransformedColumns() 	{ return _numTransformedColumns; }
+	
+	public String getHeader() 		{ return _headerLine; }
+	public boolean hasHeader() 		{ return _hasHeader; }
+	public String getDelimString() 	{ return _delimString; }
+	public Pattern getDelim() 		{ return _delim; }
+	public String[] getNAStrings() 	{ return _NAstrings; }
+	public long getNumCols() 		{ return _numInputCols; }
+	
+	public String getSpecFile() 	{ return _specFile; }
+	public String getTfMtdDir() 	{ return _tfMtdDir; }
+	public String getOffsetFile() 	{ return _offsetFile; }
+	public String getTmpDir() 		{ return _tmpDir; }
+	public String getOutputPath()	{ return _outputPath; }
+	
+	public String getName(int colID) { return _outputColumnNames[colID-1]; }
+	
+	public void setValid(long n) { _numValidRecords = n;}
+	public void incrTotal() { _numRecordsInPartFile++; }
+	public void setTotal(long n) { _numRecordsInPartFile = n;}
+	
+	public OmitAgent 	  getOmitAgent() 	{ 	return _oa; }
+	public MVImputeAgent  getMVImputeAgent(){ 	return _mia;}
+	public RecodeAgent 	  getRecodeAgent() 	{ 	return _ra; }
+	public BinAgent 	  getBinAgent() 	{ 	return _ba; }
+	public DummycodeAgent getDummycodeAgent() { return _da; }
+	
+	/**
+	 * Function that checks if the given string is one of NA strings.
+	 * 
+	 * @param w
+	 * @return
+	 */
+	public boolean isNA(String w) {
+		if(_NAstrings == null)
+			return false;
+		
+		for(String na : _NAstrings) {
+			if(w.equals(na))
+				return true;
+		}
+		return false;
+	}
+	
+	public String[] getWords(Text line)
+	{
+		return getWords(line.toString());
+	}
+	
+
+	public String[] getWords(String line) 
+	{
+		return getDelim().split(line.trim(), -1);
+	}
+	
+	/**
+	 * Process a given row to construct transformation metadata.
+	 * 
+	 * @param line
+	 * @return
+	 * @throws IOException
+	 */
+	public String[] prepareTfMtd(String line) throws IOException {
+		String[] words = getWords(line);
+		if(!getOmitAgent().omit(words, this))
+		{
+			getMVImputeAgent().prepare(words, this);
+			getRecodeAgent().prepare(words, this);
+			getBinAgent().prepare(words, this);
+			incrValid();;
+		}
+		incrTotal();
+		
+		return words;
+	}
+	
+	public void loadTfMetadata() throws IOException 
+	{
+		JobConf job = ConfigurationManager.getCachedJobConf();
+		loadTfMetadata(job, false);
+	}
+	
+	public void loadTfMetadata(JobConf job, boolean fromLocalFS) throws IOException
+	{
+		Path tfMtdDir = null; 
+		FileSystem fs = null;
+		
+		if(fromLocalFS) {
+			// metadata must be read from local file system (e.g., distributed cache in the case of Hadoop)
+			tfMtdDir = (DistributedCache.getLocalCacheFiles(job))[0];
+			fs = FileSystem.getLocal(job);
+		}
+		else {
+			fs = FileSystem.get(job);
+			tfMtdDir = new Path(getTfMtdDir());
+		}
+		
+		// load transformation metadata 
+		getMVImputeAgent().loadTxMtd(job, fs, tfMtdDir, this);
+		getRecodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
+		getBinAgent().loadTxMtd(job, fs, tfMtdDir, this);
+		
+		// associate recode maps and bin definitions with dummycoding agent,
+		// as recoded and binned columns are typically dummycoded
+		getDummycodeAgent().setRecodeMaps( getRecodeAgent().getRecodeMaps() );
+		getDummycodeAgent().setNumBins(getBinAgent().getBinList(), getBinAgent().getNumBins());
+		getDummycodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
+
+	}
+	
+	/*public void loadTfMetadata () throws IOException
+	{
+		Path tfMtdDir = (DistributedCache.getLocalCacheFiles(_rJob))[0];
+		FileSystem localFS = FileSystem.getLocal(_rJob);
+		
+		loadTfMetadata(_rJob, localFS, tfMtdDir);
+		
+		FileSystem fs;
+		fs = FileSystem.get(_rJob);
+		Path thisPath=new Path(_rJob.get("map.input.file")).makeQualified(fs);
+		String thisfile=thisPath.toString();
+			
+		Path smallestFilePath=new Path(_rJob.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(fs);
+		if(thisfile.toString().equals(smallestFilePath.toString()))
+			_partFileWithHeader=true;
+		else
+			_partFileWithHeader = false;
+	}*/
+
+
+	public String processHeaderLine() throws IOException 
+	{
+		FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
+		String dcdHeader = getDummycodeAgent().constructDummycodedHeader(getHeader(), getDelim());
+		getDummycodeAgent().genDcdMapsAndColTypes(fs, getTmpDir(), (int) getNumCols(), this);
+		
+		// write header information (before and after transformation) to temporary path
+		// these files are copied into txMtdPath, once the ApplyTf job is complete.
+		DataTransform.generateHeaderFiles(fs, getTmpDir(), getHeader(), dcdHeader);
+
+		return dcdHeader;
+		//_numTransformedColumns = getDelim().split(dcdHeader, -1).length; 
+		//return _numTransformedColumns;
+	}
+
+	public boolean omit(String[] words) {
+		if(getOmitAgent() == null)
+			return false;
+		return getOmitAgent().omit(words, this);
+	}
+	
+	
+	public String[] apply(String[] words) {
+		return apply(words, false);
+	}
+	
+	/**
+	 * Function to apply transformation metadata on a given row.
+	 * 
+	 * @param words
+	 * @param optimizeMaps
+	 * @return
+	 */
+	public String[] apply ( String[] words, boolean optimizeMaps ) 
+	{
+		words = getMVImputeAgent().apply(words, this);
+		
+		if(optimizeMaps)
+			// specific case of transform() invoked from CP (to save boxing and unboxing)
+			words = getRecodeAgent().cp_apply(words, this);
+		else
+			words = getRecodeAgent().apply(words, this);
+
+		words = getBinAgent().apply(words, this);
+		words = getDummycodeAgent().apply(words, this);
+		
+		_numTransformedRows++;
+		
+		return words;
+	}
+	
+	public void check(String []words) throws DMLRuntimeException 
+	{
+		boolean checkEmptyString = ( getNAStrings() != null );
+		if ( checkEmptyString ) 
+		{
+			final String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
+			for(int i=0; i<words.length; i++) 
+				if ( words[i] != null && words[i].equals(""))
+					throw new DMLRuntimeException(msg + getDummycodeAgent().mapDcdColumnID(i+1));
+		}
+	}
+	
+	public String checkAndPrepOutputString(String []words) throws DMLRuntimeException 
+	{
+		return checkAndPrepOutputString(words, new StringBuilder());
+	}
+	
+	public String checkAndPrepOutputString(String []words, StringBuilder sb) throws DMLRuntimeException 
+	{
+		/*
+		 * Check if empty strings ("") have to be handled.
+		 * 
+		 * Unless na.strings are provided, empty strings are (implicitly) considered as value zero.
+		 * When na.strings are provided, then "" is considered a missing value indicator, and the 
+		 * user is expected to provide an appropriate imputation method. Therefore, when na.strings 
+		 * are provided, "" encountered in any column (after all transformations are applied) 
+		 * denotes an erroneous condition.  
+		 */
+		boolean checkEmptyString = ( getNAStrings() != null ); //&& !MVImputeAgent.isNA("", TransformationAgent.NAstrings) ) {
+		
+		//StringBuilder sb = new StringBuilder();
+		sb.setLength(0);
+		int i =0;
+		
+		if ( checkEmptyString ) 
+		{
+			final String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
+			if ( words[0] != null ) 
+				if ( words[0].equals("") )
+					throw new DMLRuntimeException( msg + getDummycodeAgent().mapDcdColumnID(1));
+				else 
+					sb.append(words[0]);
+			else
+				sb.append("0");
+			
+			for(i=1; i<words.length; i++) 
+			{
+				sb.append(_delimString);
+				
+				if ( words[i] != null ) 
+					if ( words[i].equals("") )
+						throw new DMLRuntimeException(msg + getDummycodeAgent().mapDcdColumnID(i+1));
+					else 
+						sb.append(words[i]);
+				else
+					sb.append("0");
+			}
+		}
+		else 
+		{
+			sb.append(words[0] != null ? words[0] : "0");
+			for(i=1; i<words.length; i++) 
+			{
+				sb.append(_delimString);
+				sb.append(words[i] != null ? words[i] : "0");
+			}
+		}
+		
+		return sb.toString();
+	}
+
+	private Reader initOffsetsReader(JobConf job) throws IOException 
+	{
+		Path path=new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
+		FileSystem fs = FileSystem.get(job);
+		Path[] files = MatrixReader.getSequenceFilePaths(fs, path);
+		if ( files.length != 1 )
+			throw new IOException("Expecting a single file under counters file: " + path.toString());
+		
+		Reader reader = new SequenceFile.Reader(fs, files[0], job);
+		
+		return reader;
+	}
+	
+	/**
+	 * Function to generate custom file names (transform-part-.....) for
+	 * mappers' output for ApplyTfCSV job. The idea is to find the index 
+	 * of (thisfile, fileoffset) in the list of all offsets from the 
+	 * counters/offsets file, which was generated from either GenTfMtdMR
+	 * or AssignRowIDMR job.
+	 * 
+	 */
+	public String getPartFileID(JobConf job, long offset) throws IOException
+	{
+		Reader reader = initOffsetsReader(job);
+		
+		ByteWritable key=new ByteWritable();
+		OffsetCount value=new OffsetCount();
+		String thisFile = TfUtils.getPartFileName(job);
+		
+		int id = 0;
+		while (reader.next(key, value)) {
+			if ( thisFile.equals(value.filename) && value.fileOffset == offset ) 
+				break;
+			id++;
+		}
+		reader.close();
+		
+		String sid = Integer.toString(id);
+		char[] carr = new char[5-sid.length()];
+		Arrays.fill(carr, '0');
+		String ret = (new String(carr)).concat(sid);
+		
+		return ret;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/TransformationAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/TransformationAgent.java b/src/main/java/org/apache/sysml/runtime/transform/TransformationAgent.java
index e818089..2c5e37f 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/TransformationAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/TransformationAgent.java
@@ -1,93 +1,93 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-
-public abstract class TransformationAgent implements Serializable {
-	
-	private static final long serialVersionUID = -2995384194257356337L;
-	
-	public static enum TX_METHOD { 
-		IMPUTE ("impute"), 
-		RECODE ("recode"), 
-		BIN ("bin"), 
-		DUMMYCODE ("dummycode"), 
-		SCALE ("scale"),
-		OMIT ("omit"),
-		MVRCD ("mvrcd");
-		
-		private String _name;
-		
-		TX_METHOD(String name) { _name = name; }
-		
-		public String toString() {
-			return _name;
-		}
-	}
-	
-	protected static String JSON_ATTRS 	= "attributes"; 
-	protected static String JSON_MTHD 	= "methods"; 
-	protected static String JSON_CONSTS = "constants"; 
-	protected static String JSON_NBINS 	= "numbins"; 
-	
-	protected static final String MV_FILE_SUFFIX 		= ".impute";
-	protected static final String RCD_MAP_FILE_SUFFIX 	= ".map";
-	protected static final String NDISTINCT_FILE_SUFFIX = ".ndistinct";
-	protected static final String MODE_FILE_SUFFIX 		= ".mode";
-	protected static final String BIN_FILE_SUFFIX 		= ".bin";
-	protected static final String SCALE_FILE_SUFFIX		= ".scale";
-	protected static final String DCD_FILE_NAME 		= "dummyCodeMaps.csv";
-	protected static final String COLTYPES_FILE_NAME 	= "coltypes.csv";
-	
-	protected static final String TXMTD_SEP 	= ",";
-	protected static final String DCD_NAME_SEP 	= "_";
-	
-	protected static final String OUT_HEADER = "column.names";
-	protected static final String OUT_DCD_HEADER = "dummycoded.column.names";
-	
-	abstract public void print();
-	abstract public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException;
-	abstract public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException;
-	
-	abstract public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException;
-	abstract public String[] apply(String[] words, TfUtils agents);
-	
-	protected enum ColumnTypes { SCALE, NOMINAL, ORDINAL, DUMMYCODED, INVALID }
-	protected byte columnTypeToID(ColumnTypes type) throws IOException { 
-		switch(type) 
-		{
-		case SCALE: return 1;
-		case NOMINAL: return 2;
-		case ORDINAL: return 3;
-		case DUMMYCODED: return 1; // Ideally, dummycoded columns should be of a different type. Treating them as SCALE is incorrect, semantically.
-		default:
-			throw new IOException("Invalid Column Type: " + type);
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+
+public abstract class TransformationAgent implements Serializable {
+	
+	private static final long serialVersionUID = -2995384194257356337L;
+	
+	public static enum TX_METHOD { 
+		IMPUTE ("impute"), 
+		RECODE ("recode"), 
+		BIN ("bin"), 
+		DUMMYCODE ("dummycode"), 
+		SCALE ("scale"),
+		OMIT ("omit"),
+		MVRCD ("mvrcd");
+		
+		private String _name;
+		
+		TX_METHOD(String name) { _name = name; }
+		
+		public String toString() {
+			return _name;
+		}
+	}
+	
+	protected static String JSON_ATTRS 	= "attributes"; 
+	protected static String JSON_MTHD 	= "methods"; 
+	protected static String JSON_CONSTS = "constants"; 
+	protected static String JSON_NBINS 	= "numbins"; 
+	
+	protected static final String MV_FILE_SUFFIX 		= ".impute";
+	protected static final String RCD_MAP_FILE_SUFFIX 	= ".map";
+	protected static final String NDISTINCT_FILE_SUFFIX = ".ndistinct";
+	protected static final String MODE_FILE_SUFFIX 		= ".mode";
+	protected static final String BIN_FILE_SUFFIX 		= ".bin";
+	protected static final String SCALE_FILE_SUFFIX		= ".scale";
+	protected static final String DCD_FILE_NAME 		= "dummyCodeMaps.csv";
+	protected static final String COLTYPES_FILE_NAME 	= "coltypes.csv";
+	
+	protected static final String TXMTD_SEP 	= ",";
+	protected static final String DCD_NAME_SEP 	= "_";
+	
+	protected static final String OUT_HEADER = "column.names";
+	protected static final String OUT_DCD_HEADER = "dummycoded.column.names";
+	
+	abstract public void print();
+	abstract public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException;
+	abstract public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException;
+	
+	abstract public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException;
+	abstract public String[] apply(String[] words, TfUtils agents);
+	
+	protected enum ColumnTypes { SCALE, NOMINAL, ORDINAL, DUMMYCODED, INVALID }
+	protected byte columnTypeToID(ColumnTypes type) throws IOException { 
+		switch(type) 
+		{
+		case SCALE: return 1;
+		case NOMINAL: return 2;
+		case ORDINAL: return 3;
+		case DUMMYCODED: return 1; // Ideally, dummycoded columns should be of a different type. Treating them as SCALE is incorrect, semantically.
+		default:
+			throw new IOException("Invalid Column Type: " + type);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/standalone/runStandaloneSystemML.bat
----------------------------------------------------------------------
diff --git a/src/main/standalone/runStandaloneSystemML.bat b/src/main/standalone/runStandaloneSystemML.bat
index aba2002..f837970 100644
--- a/src/main/standalone/runStandaloneSystemML.bat
+++ b/src/main/standalone/runStandaloneSystemML.bat
@@ -1,50 +1,50 @@
-::-------------------------------------------------------------
-::
-:: Licensed to the Apache Software Foundation (ASF) under one
-:: or more contributor license agreements.  See the NOTICE file
-:: distributed with this work for additional information
-:: regarding copyright ownership.  The ASF licenses this file
-:: to you under the Apache License, Version 2.0 (the
-:: "License"); you may not use this file except in compliance
-:: with the License.  You may obtain a copy of the License at
-:: 
-::   http://www.apache.org/licenses/LICENSE-2.0
-:: 
-:: Unless required by applicable law or agreed to in writing,
-:: software distributed under the License is distributed on an
-:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-:: KIND, either express or implied.  See the License for the
-:: specific language governing permissions and limitations
-:: under the License.
-::
-::-------------------------------------------------------------
-
-@ECHO OFF
-
-IF "%~1" == ""  GOTO Err
-IF "%~1" == "-help" GOTO Msg
-IF "%~1" == "-h" GOTO Msg
-
-setLocal EnableDelayedExpansion
-
-SET HADOOP_HOME=%CD%/lib/hadoop
-
-set CLASSPATH=./lib/*
-echo !CLASSPATH!
-
-set LOG4JPROP=log4j.properties
-
-for /f "tokens=1,* delims= " %%a in ("%*") do set ALLBUTFIRST=%%b
-
-java -Xmx4g -Xms4g -Xmn400m -cp %CLASSPATH% -Dlog4j.configuration=file:%LOG4JPROP% org.apache.sysml.api.DMLScript -f %1 -exec singlenode -config=SystemML-config.xml %ALLBUTFIRST%
-GOTO End
-
-:Err
-ECHO "Wrong Usage. Please provide DML filename to be executed."
-GOTO Msg
-
-:Msg
-ECHO "Usage: runStandaloneSystemML.bat <dml-filename> [arguments] [-help]"
-ECHO "Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar -f <dml-filename> -exec singlenode -config=SystemML-config.xml [Optional-Arguments]'"
-
-:End
+::-------------------------------------------------------------
+::
+:: Licensed to the Apache Software Foundation (ASF) under one
+:: or more contributor license agreements.  See the NOTICE file
+:: distributed with this work for additional information
+:: regarding copyright ownership.  The ASF licenses this file
+:: to you under the Apache License, Version 2.0 (the
+:: "License"); you may not use this file except in compliance
+:: with the License.  You may obtain a copy of the License at
+:: 
+::   http://www.apache.org/licenses/LICENSE-2.0
+:: 
+:: Unless required by applicable law or agreed to in writing,
+:: software distributed under the License is distributed on an
+:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+:: KIND, either express or implied.  See the License for the
+:: specific language governing permissions and limitations
+:: under the License.
+::
+::-------------------------------------------------------------
+
+@ECHO OFF
+
+IF "%~1" == ""  GOTO Err
+IF "%~1" == "-help" GOTO Msg
+IF "%~1" == "-h" GOTO Msg
+
+setLocal EnableDelayedExpansion
+
+SET HADOOP_HOME=%CD%/lib/hadoop
+
+set CLASSPATH=./lib/*
+echo !CLASSPATH!
+
+set LOG4JPROP=log4j.properties
+
+for /f "tokens=1,* delims= " %%a in ("%*") do set ALLBUTFIRST=%%b
+
+java -Xmx4g -Xms4g -Xmn400m -cp %CLASSPATH% -Dlog4j.configuration=file:%LOG4JPROP% org.apache.sysml.api.DMLScript -f %1 -exec singlenode -config=SystemML-config.xml %ALLBUTFIRST%
+GOTO End
+
+:Err
+ECHO "Wrong Usage. Please provide DML filename to be executed."
+GOTO Msg
+
+:Msg
+ECHO "Usage: runStandaloneSystemML.bat <dml-filename> [arguments] [-help]"
+ECHO "Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar -f <dml-filename> -exec singlenode -config=SystemML-config.xml [Optional-Arguments]'"
+
+:End

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/apply-transform/apply-transform.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/apply-transform/apply-transform.dml b/src/test/scripts/applications/apply-transform/apply-transform.dml
index de7fa02..fdd85c7 100644
--- a/src/test/scripts/applications/apply-transform/apply-transform.dml
+++ b/src/test/scripts/applications/apply-transform/apply-transform.dml
@@ -1,156 +1,156 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-cmdLine_missing_value_maps = ifdef($missing_value_maps, " ")
-cmdLine_bin_defns = ifdef($bin_defns, " ")
-cmdLine_dummy_code_maps = ifdef($dummy_code_maps, " ")
-cmdLine_normalization_maps = ifdef($normalization_maps, " ")
-
-original_X = read($X)
-
-if(cmdLine_missing_value_maps != " "){
-	missing_val_maps = read(cmdLine_missing_value_maps)
-
-	last_data_col = ncol(original_X)-nrow(missing_val_maps)
-	X = original_X[,1:last_data_col]
-}else
-	X = original_X
-
-# col 1: col index of missing indicator col
-#		 0 otherwise
-# col 2: global mean if imputation is needed
-# col 3: num_bins if binning is required
-# col 4: bin width if binning is required
-# col 5: min val if binning is required
-# col 6: begin col if dummy coding is required
-# col 7: end col if dummy coding is required
-# col 8: 1 if normalization is required 0 ow
-# col 9: mean for normalization
-# col 10: std for z-scoring for normalization
-#		 -1 indicates mean subtraction  
-attrinfo = matrix(0, rows=ncol(X), cols=10)
-
-if(cmdLine_missing_value_maps != " "){
-	missing_indicator_mat = original_X[,(last_data_col+1):ncol(original_X)]
-	
-	parfor(i in 1:nrow(missing_val_maps), check=0){
-		attr_index_mv = castAsScalar(missing_val_maps[i,1])
-		attrinfo[attr_index_mv,1] = i
-		attrinfo[attr_index_mv,2] = missing_val_maps[i,2]
-	}	
-}
- 	
-if(cmdLine_bin_defns != " "){
-	bin_defns = read(cmdLine_bin_defns)
-	parfor(i in 1:nrow(bin_defns), check=0){
-		attr_index_bin = castAsScalar(bin_defns[i,1])
-		attrinfo[attr_index_bin,3] = bin_defns[i,4]
-		attrinfo[attr_index_bin,4] = bin_defns[i,2]
-		attrinfo[attr_index_bin,5] = bin_defns[i,3]
-	}
-}
-
-if(cmdLine_dummy_code_maps != " "){
-	dummy_code_maps = read(cmdLine_dummy_code_maps)
-	parfor(i in 1:nrow(dummy_code_maps), check=0){
-		attr_index_dc = castAsScalar(dummy_code_maps[i,1])
-		attrinfo[attr_index_dc,6] = dummy_code_maps[i,2]
-		attrinfo[attr_index_dc,7] = dummy_code_maps[i,3]
-	}
-}else{
-	attrinfo[,6] = seq(1, ncol(X), 1)
-	attrinfo[,7] = seq(1, ncol(X), 1)
-}
-
-if(cmdLine_normalization_maps != " "){
-	normalization_map = read(cmdLine_normalization_maps)
-	parfor(i in 1:nrow(normalization_map), check=0){
-		attr_index_normalization = castAsScalar(normalization_map[i,1])
-		attrinfo[attr_index_normalization,8] = 1
-		attrinfo[attr_index_normalization,9] = castAsScalar(normalization_map[i,2])
-		attrinfo[attr_index_normalization,10] = castAsScalar(normalization_map[i,3])
-	}
-}
-
-#write(attrinfo, "binning/attrinfo.mtx", format="csv")
-
-cols_in_transformed_X = castAsScalar(attrinfo[nrow(attrinfo),6])
-new_X = matrix(0, rows=nrow(X), cols=cols_in_transformed_X)
-log = matrix(0, rows=ncol(X), cols=2)
-parfor(i in 1:ncol(X), check=0){
-	col = X[,i]
-	
-	mv_col_id = castAsScalar(attrinfo[i,1])
-	global_mean = castAsScalar(attrinfo[i,2])
-	num_bins = castAsScalar(attrinfo[i,3])
-	bin_width = castAsScalar(attrinfo[i,4])
-	min_val = castAsScalar(attrinfo[i,5])
-	dummy_coding_beg_col = castAsScalar(attrinfo[i,6])
-	dummy_coding_end_col = castAsScalar(attrinfo[i,7])
-	normalization_needed = castAsScalar(attrinfo[i,8])
-	normalization_mean = castAsScalar(attrinfo[i,9])
-	normalization_std = castAsScalar(attrinfo[i,10])
-	
-	if(mv_col_id > 0){ 
-		# fill-in with global mean
-		col = col + missing_indicator_mat[,mv_col_id] * global_mean
-	}
-	
-	if(num_bins > 0){
-		# only for equiwidth bins
-	
-		# note that max_val entries will get assigned num_bins+1
-		col = round((col - min_val)/bin_width - 0.5) + 1
-		less_than_lb = ppred(col, 1, "<")
-		more_than_ub = ppred(col, num_bins, ">")
-		
-		col = (1 - less_than_lb - more_than_ub)*col + more_than_ub*num_bins + less_than_lb
-	}
-
-	if(dummy_coding_beg_col == dummy_coding_end_col){
-		if(normalization_needed == 1){
-			if(normalization_std == -1) col = col - normalization_mean
-			else col = (col - normalization_mean)/normalization_std
-		}
-		
-		new_X[,dummy_coding_beg_col] = col
-	}else{
-		min_val = min(col)
-		max_val = max(col)
-		if(min_val >= 1 & max_val <= dummy_coding_end_col - dummy_coding_beg_col + 1){
-			res = table(seq(1, nrow(X), 1), col, nrow(X), (dummy_coding_end_col-dummy_coding_beg_col+1))
-			new_X[,dummy_coding_beg_col:dummy_coding_end_col] = res
-		}else{
-			log[i,1] = 1
-			if(min_val < 1) log[i,2] = min_val
-			else log[i,2] = max_val
-		}
-	}
-}
-
-write(new_X, $transformed_X, format="text")
-
-s = "Warning Messages"
-for(i in 1:nrow(log)){
-	if(castAsScalar(log[i,1]) == 1)
-		s = append(s, "Unseen value in column " + i + " (" + castAsScalar(log[i,2]) + ")")
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmdLine_missing_value_maps = ifdef($missing_value_maps, " ")
+cmdLine_bin_defns = ifdef($bin_defns, " ")
+cmdLine_dummy_code_maps = ifdef($dummy_code_maps, " ")
+cmdLine_normalization_maps = ifdef($normalization_maps, " ")
+
+original_X = read($X)
+
+if(cmdLine_missing_value_maps != " "){
+	missing_val_maps = read(cmdLine_missing_value_maps)
+
+	last_data_col = ncol(original_X)-nrow(missing_val_maps)
+	X = original_X[,1:last_data_col]
+}else
+	X = original_X
+
+# col 1: col index of missing indicator col
+#		 0 otherwise
+# col 2: global mean if imputation is needed
+# col 3: num_bins if binning is required
+# col 4: bin width if binning is required
+# col 5: min val if binning is required
+# col 6: begin col if dummy coding is required
+# col 7: end col if dummy coding is required
+# col 8: 1 if normalization is required 0 ow
+# col 9: mean for normalization
+# col 10: std for z-scoring for normalization
+#		 -1 indicates mean subtraction  
+attrinfo = matrix(0, rows=ncol(X), cols=10)
+
+if(cmdLine_missing_value_maps != " "){
+	missing_indicator_mat = original_X[,(last_data_col+1):ncol(original_X)]
+	
+	parfor(i in 1:nrow(missing_val_maps), check=0){
+		attr_index_mv = castAsScalar(missing_val_maps[i,1])
+		attrinfo[attr_index_mv,1] = i
+		attrinfo[attr_index_mv,2] = missing_val_maps[i,2]
+	}	
+}
+ 	
+if(cmdLine_bin_defns != " "){
+	bin_defns = read(cmdLine_bin_defns)
+	parfor(i in 1:nrow(bin_defns), check=0){
+		attr_index_bin = castAsScalar(bin_defns[i,1])
+		attrinfo[attr_index_bin,3] = bin_defns[i,4]
+		attrinfo[attr_index_bin,4] = bin_defns[i,2]
+		attrinfo[attr_index_bin,5] = bin_defns[i,3]
+	}
+}
+
+if(cmdLine_dummy_code_maps != " "){
+	dummy_code_maps = read(cmdLine_dummy_code_maps)
+	parfor(i in 1:nrow(dummy_code_maps), check=0){
+		attr_index_dc = castAsScalar(dummy_code_maps[i,1])
+		attrinfo[attr_index_dc,6] = dummy_code_maps[i,2]
+		attrinfo[attr_index_dc,7] = dummy_code_maps[i,3]
+	}
+}else{
+	attrinfo[,6] = seq(1, ncol(X), 1)
+	attrinfo[,7] = seq(1, ncol(X), 1)
+}
+
+if(cmdLine_normalization_maps != " "){
+	normalization_map = read(cmdLine_normalization_maps)
+	parfor(i in 1:nrow(normalization_map), check=0){
+		attr_index_normalization = castAsScalar(normalization_map[i,1])
+		attrinfo[attr_index_normalization,8] = 1
+		attrinfo[attr_index_normalization,9] = castAsScalar(normalization_map[i,2])
+		attrinfo[attr_index_normalization,10] = castAsScalar(normalization_map[i,3])
+	}
+}
+
+#write(attrinfo, "binning/attrinfo.mtx", format="csv")
+
+cols_in_transformed_X = castAsScalar(attrinfo[nrow(attrinfo),6])
+new_X = matrix(0, rows=nrow(X), cols=cols_in_transformed_X)
+log = matrix(0, rows=ncol(X), cols=2)
+parfor(i in 1:ncol(X), check=0){
+	col = X[,i]
+	
+	mv_col_id = castAsScalar(attrinfo[i,1])
+	global_mean = castAsScalar(attrinfo[i,2])
+	num_bins = castAsScalar(attrinfo[i,3])
+	bin_width = castAsScalar(attrinfo[i,4])
+	min_val = castAsScalar(attrinfo[i,5])
+	dummy_coding_beg_col = castAsScalar(attrinfo[i,6])
+	dummy_coding_end_col = castAsScalar(attrinfo[i,7])
+	normalization_needed = castAsScalar(attrinfo[i,8])
+	normalization_mean = castAsScalar(attrinfo[i,9])
+	normalization_std = castAsScalar(attrinfo[i,10])
+	
+	if(mv_col_id > 0){ 
+		# fill-in with global mean
+		col = col + missing_indicator_mat[,mv_col_id] * global_mean
+	}
+	
+	if(num_bins > 0){
+		# only for equiwidth bins
+	
+		# note that max_val entries will get assigned num_bins+1
+		col = round((col - min_val)/bin_width - 0.5) + 1
+		less_than_lb = ppred(col, 1, "<")
+		more_than_ub = ppred(col, num_bins, ">")
+		
+		col = (1 - less_than_lb - more_than_ub)*col + more_than_ub*num_bins + less_than_lb
+	}
+
+	if(dummy_coding_beg_col == dummy_coding_end_col){
+		if(normalization_needed == 1){
+			if(normalization_std == -1) col = col - normalization_mean
+			else col = (col - normalization_mean)/normalization_std
+		}
+		
+		new_X[,dummy_coding_beg_col] = col
+	}else{
+		min_val = min(col)
+		max_val = max(col)
+		if(min_val >= 1 & max_val <= dummy_coding_end_col - dummy_coding_beg_col + 1){
+			res = table(seq(1, nrow(X), 1), col, nrow(X), (dummy_coding_end_col-dummy_coding_beg_col+1))
+			new_X[,dummy_coding_beg_col:dummy_coding_end_col] = res
+		}else{
+			log[i,1] = 1
+			if(min_val < 1) log[i,2] = min_val
+			else log[i,2] = max_val
+		}
+	}
+}
+
+write(new_X, $transformed_X, format="text")
+
+s = "Warning Messages"
+for(i in 1:nrow(log)){
+	if(castAsScalar(log[i,1]) == 1)
+		s = append(s, "Unseen value in column " + i + " (" + castAsScalar(log[i,2]) + ")")
+}
 write(s, $Log)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/apply-transform/apply-transform.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/apply-transform/apply-transform.pydml b/src/test/scripts/applications/apply-transform/apply-transform.pydml
index be04495..f6c40dd 100644
--- a/src/test/scripts/applications/apply-transform/apply-transform.pydml
+++ b/src/test/scripts/applications/apply-transform/apply-transform.pydml
@@ -1,146 +1,146 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-cmdLine_missing_value_maps = ifdef($missing_value_maps, " ")
-cmdLine_bin_defns = ifdef($bin_defns, " ")
-cmdLine_dummy_code_maps = ifdef($dummy_code_maps, " ")
-cmdLine_normalization_maps = ifdef($normalization_maps, " ")
-
-original_X = load($X)
-
-if(cmdLine_missing_value_maps != " "):
-    missing_val_maps = read(cmdLine_missing_value_maps)
-
-    last_data_col = ncol(original_X)-nrow(missing_val_maps)
-    X = original_X[,1:last_data_col]
-else:
-    X = original_X
-
-# col 1: col index of missing indicator col
-#         0 otherwise
-# col 2: global mean if imputation is needed
-# col 3: num_bins if binning is required
-# col 4: bin width if binning is required
-# col 5: min val if binning is required
-# col 6: begin col if dummy coding is required
-# col 7: end col if dummy coding is required
-# col 8: 1 if normalization is required 0 ow
-# col 9: mean for normalization
-# col 10: std for z-scoring for normalization
-#         -1 indicates mean subtraction  
-attrinfo = full(0, rows=ncol(X), cols=10)
-
-if(cmdLine_missing_value_maps != " "):
-    missing_indicator_mat = original_X[,(last_data_col+1):ncol(original_X)]
-    
-    parfor(i in 1:nrow(missing_val_maps), check=0):
-        attr_index_mv = castAsScalar(missing_val_maps[i,1])
-        attrinfo[attr_index_mv,1] = i
-        attrinfo[attr_index_mv,2] = missing_val_maps[i,2]
-    
-if(cmdLine_bin_defns != " "):
-    bin_defns = read(cmdLine_bin_defns)
-    parfor(i in 1:nrow(bin_defns), check=0):
-        attr_index_bin = castAsScalar(bin_defns[i,1])
-        attrinfo[attr_index_bin,3] = bin_defns[i,4]
-        attrinfo[attr_index_bin,4] = bin_defns[i,2]
-        attrinfo[attr_index_bin,5] = bin_defns[i,3]
-
-if(cmdLine_dummy_code_maps != " "):
-    dummy_code_maps = read(cmdLine_dummy_code_maps)
-    parfor(i in 1:nrow(dummy_code_maps), check=0):
-        attr_index_dc = castAsScalar(dummy_code_maps[i,1])
-        attrinfo[attr_index_dc,6] = dummy_code_maps[i,2]
-        attrinfo[attr_index_dc,7] = dummy_code_maps[i,3]
-else:
-    attrinfo[,6] = seq(1, ncol(X), 1)
-    attrinfo[,7] = seq(1, ncol(X), 1)
-
-if(cmdLine_normalization_maps != " "):
-    normalization_map = read(cmdLine_normalization_maps)
-    parfor(i in 1:nrow(normalization_map), check=0):
-        attr_index_normalization = castAsScalar(normalization_map[i,1])
-        attrinfo[attr_index_normalization,8] = 1
-        attrinfo[attr_index_normalization,9] = castAsScalar(normalization_map[i,2])
-        attrinfo[attr_index_normalization,10] = castAsScalar(normalization_map[i,3])
-
-#write(attrinfo, "binning/attrinfo.mtx", format="csv")
-
-cols_in_transformed_X = castAsScalar(attrinfo[nrow(attrinfo),6])
-new_X = full(0, rows=nrow(X), cols=cols_in_transformed_X)
-log = full(0, rows=ncol(X), cols=2)
-parfor(i in 1:ncol(X), check=0):
-    col = X[,i]
-    
-    mv_col_id = castAsScalar(attrinfo[i,1])
-    global_mean = castAsScalar(attrinfo[i,2])
-    num_bins = castAsScalar(attrinfo[i,3])
-    bin_width = castAsScalar(attrinfo[i,4])
-    min_val = castAsScalar(attrinfo[i,5])
-    dummy_coding_beg_col = castAsScalar(attrinfo[i,6])
-    dummy_coding_end_col = castAsScalar(attrinfo[i,7])
-    normalization_needed = castAsScalar(attrinfo[i,8])
-    normalization_mean = castAsScalar(attrinfo[i,9])
-    normalization_std = castAsScalar(attrinfo[i,10])
-    
-    if(mv_col_id > 0):
-        # fill-in with global mean
-        col = col + missing_indicator_mat[,mv_col_id] * global_mean
-    
-    if(num_bins > 0):
-        # only for equiwidth bins
-    
-        # note that max_val entries will get assigned num_bins+1
-        col = round((col - min_val)/bin_width - 0.5) + 1
-        less_than_lb = ppred(col, 1, "<")
-        more_than_ub = ppred(col, num_bins, ">")
-        
-        col = (1 - less_than_lb - more_than_ub)*col + more_than_ub*num_bins + less_than_lb
-
-    if(dummy_coding_beg_col == dummy_coding_end_col):
-        if(normalization_needed == 1):
-            if(normalization_std == -1):
-                col = col - normalization_mean
-            else:
-                col = (col - normalization_mean)/normalization_std
-        
-        new_X[,dummy_coding_beg_col] = col
-    else:
-        min_val = min(col)
-        max_val = max(col)
-        if(min_val >= 1 & max_val <= dummy_coding_end_col - dummy_coding_beg_col + 1):
-            res = table(seq(1, nrow(X), 1), col, nrow(X), (dummy_coding_end_col-dummy_coding_beg_col+1))
-            new_X[,dummy_coding_beg_col:dummy_coding_end_col] = res
-        else:
-            log[i,1] = 1
-            if(min_val < 1):
-                log[i,2] = min_val
-            else:
-                log[i,2] = max_val
-
-save(new_X, $transformed_X, format="text")
-
-s = "Warning Messages"
-for(i in 1:nrow(log)):
-    if(castAsScalar(log[i,1]) == 1):
-        s = append(s, "Unseen value in column " + i + " (" + castAsScalar(log[i,2]) + ")")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmdLine_missing_value_maps = ifdef($missing_value_maps, " ")
+cmdLine_bin_defns = ifdef($bin_defns, " ")
+cmdLine_dummy_code_maps = ifdef($dummy_code_maps, " ")
+cmdLine_normalization_maps = ifdef($normalization_maps, " ")
+
+original_X = load($X)
+
+if(cmdLine_missing_value_maps != " "):
+    missing_val_maps = read(cmdLine_missing_value_maps)
+
+    last_data_col = ncol(original_X)-nrow(missing_val_maps)
+    X = original_X[,1:last_data_col]
+else:
+    X = original_X
+
+# col 1: col index of missing indicator col
+#         0 otherwise
+# col 2: global mean if imputation is needed
+# col 3: num_bins if binning is required
+# col 4: bin width if binning is required
+# col 5: min val if binning is required
+# col 6: begin col if dummy coding is required
+# col 7: end col if dummy coding is required
+# col 8: 1 if normalization is required 0 ow
+# col 9: mean for normalization
+# col 10: std for z-scoring for normalization
+#         -1 indicates mean subtraction  
+attrinfo = full(0, rows=ncol(X), cols=10)
+
+if(cmdLine_missing_value_maps != " "):
+    missing_indicator_mat = original_X[,(last_data_col+1):ncol(original_X)]
+    
+    parfor(i in 1:nrow(missing_val_maps), check=0):
+        attr_index_mv = castAsScalar(missing_val_maps[i,1])
+        attrinfo[attr_index_mv,1] = i
+        attrinfo[attr_index_mv,2] = missing_val_maps[i,2]
+    
+if(cmdLine_bin_defns != " "):
+    bin_defns = read(cmdLine_bin_defns)
+    parfor(i in 1:nrow(bin_defns), check=0):
+        attr_index_bin = castAsScalar(bin_defns[i,1])
+        attrinfo[attr_index_bin,3] = bin_defns[i,4]
+        attrinfo[attr_index_bin,4] = bin_defns[i,2]
+        attrinfo[attr_index_bin,5] = bin_defns[i,3]
+
+if(cmdLine_dummy_code_maps != " "):
+    dummy_code_maps = read(cmdLine_dummy_code_maps)
+    parfor(i in 1:nrow(dummy_code_maps), check=0):
+        attr_index_dc = castAsScalar(dummy_code_maps[i,1])
+        attrinfo[attr_index_dc,6] = dummy_code_maps[i,2]
+        attrinfo[attr_index_dc,7] = dummy_code_maps[i,3]
+else:
+    attrinfo[,6] = seq(1, ncol(X), 1)
+    attrinfo[,7] = seq(1, ncol(X), 1)
+
+if(cmdLine_normalization_maps != " "):
+    normalization_map = read(cmdLine_normalization_maps)
+    parfor(i in 1:nrow(normalization_map), check=0):
+        attr_index_normalization = castAsScalar(normalization_map[i,1])
+        attrinfo[attr_index_normalization,8] = 1
+        attrinfo[attr_index_normalization,9] = castAsScalar(normalization_map[i,2])
+        attrinfo[attr_index_normalization,10] = castAsScalar(normalization_map[i,3])
+
+#write(attrinfo, "binning/attrinfo.mtx", format="csv")
+
+cols_in_transformed_X = castAsScalar(attrinfo[nrow(attrinfo),6])
+new_X = full(0, rows=nrow(X), cols=cols_in_transformed_X)
+log = full(0, rows=ncol(X), cols=2)
+parfor(i in 1:ncol(X), check=0):
+    col = X[,i]
+    
+    mv_col_id = castAsScalar(attrinfo[i,1])
+    global_mean = castAsScalar(attrinfo[i,2])
+    num_bins = castAsScalar(attrinfo[i,3])
+    bin_width = castAsScalar(attrinfo[i,4])
+    min_val = castAsScalar(attrinfo[i,5])
+    dummy_coding_beg_col = castAsScalar(attrinfo[i,6])
+    dummy_coding_end_col = castAsScalar(attrinfo[i,7])
+    normalization_needed = castAsScalar(attrinfo[i,8])
+    normalization_mean = castAsScalar(attrinfo[i,9])
+    normalization_std = castAsScalar(attrinfo[i,10])
+    
+    if(mv_col_id > 0):
+        # fill-in with global mean
+        col = col + missing_indicator_mat[,mv_col_id] * global_mean
+    
+    if(num_bins > 0):
+        # only for equiwidth bins
+    
+        # note that max_val entries will get assigned num_bins+1
+        col = round((col - min_val)/bin_width - 0.5) + 1
+        less_than_lb = ppred(col, 1, "<")
+        more_than_ub = ppred(col, num_bins, ">")
+        
+        col = (1 - less_than_lb - more_than_ub)*col + more_than_ub*num_bins + less_than_lb
+
+    if(dummy_coding_beg_col == dummy_coding_end_col):
+        if(normalization_needed == 1):
+            if(normalization_std == -1):
+                col = col - normalization_mean
+            else:
+                col = (col - normalization_mean)/normalization_std
+        
+        new_X[,dummy_coding_beg_col] = col
+    else:
+        min_val = min(col)
+        max_val = max(col)
+        if(min_val >= 1 & max_val <= dummy_coding_end_col - dummy_coding_beg_col + 1):
+            res = table(seq(1, nrow(X), 1), col, nrow(X), (dummy_coding_end_col-dummy_coding_beg_col+1))
+            new_X[,dummy_coding_beg_col:dummy_coding_end_col] = res
+        else:
+            log[i,1] = 1
+            if(min_val < 1):
+                log[i,2] = min_val
+            else:
+                log[i,2] = max_val
+
+save(new_X, $transformed_X, format="text")
+
+s = "Warning Messages"
+for(i in 1:nrow(log)):
+    if(castAsScalar(log[i,1]) == 1):
+        s = append(s, "Unseen value in column " + i + " (" + castAsScalar(log[i,2]) + ")")
+
 save(s, $Log)
\ No newline at end of file


[53/55] incubator-systemml git commit: [SYSTEMML-134] Update standalone readme.txt

Posted by du...@apache.org.
[SYSTEMML-134] Update standalone readme.txt

Update outdated standalone info.
Rename readme.txt to README.txt.
Update standalone.xml

Closes #51.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/048ac6d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/048ac6d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/048ac6d1

Branch: refs/heads/branch-0.9
Commit: 048ac6d1f9807dee00fcddba1e9ef96bdf7029cd
Parents: caa3c2a
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Fri Jan 22 16:47:47 2016 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Fri Jan 22 16:47:47 2016 -0800

----------------------------------------------------------------------
 src/assembly/standalone.xml    |  2 +-
 src/main/standalone/README.txt | 65 +++++++++++++++++++++++++++++++++++++
 src/main/standalone/readme.txt | 11 -------
 3 files changed, 66 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/048ac6d1/src/assembly/standalone.xml
----------------------------------------------------------------------
diff --git a/src/assembly/standalone.xml b/src/assembly/standalone.xml
index 2850760..5979d86 100644
--- a/src/assembly/standalone.xml
+++ b/src/assembly/standalone.xml
@@ -135,7 +135,7 @@
 			<directory>${basedir}/src/main/standalone</directory>
 			<includes>
 				<include>log4j.properties</include>
-				<include>readme.txt</include>
+				<include>README.txt</include>
 				<include>SystemML-config.xml</include>
 			</includes>
 			<outputDirectory>.</outputDirectory>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/048ac6d1/src/main/standalone/README.txt
----------------------------------------------------------------------
diff --git a/src/main/standalone/README.txt b/src/main/standalone/README.txt
new file mode 100644
index 0000000..af60940
--- /dev/null
+++ b/src/main/standalone/README.txt
@@ -0,0 +1,65 @@
+-------------------------------------------------------------------------------
+Apache SystemML (incubating)
+-------------------------------------------------------------------------------
+
+SystemML is now an Apache Incubator project! Please see the Apache SystemML
+(incubating) website at http://systemml.apache.org/ for more information. The
+latest project documentation can be found at the SystemML Documentation website
+on GitHub at http://apache.github.io/incubator-systemml/.
+
+SystemML is a flexible, scalable machine learning system. SystemML's
+distinguishing characteristics are:
+
+  1. Algorithm customizability via R-like and Python-like languages.
+  2. Multiple execution modes, including Standalone, Spark Batch, Spark
+     MLContext, Hadoop Batch, and JMLC.
+  3. Automatic optimization based on data and cluster characteristics to ensure
+     both efficiency and scalability.
+
+
+-------------------------------------------------------------------------------
+SystemML in Standalone Mode
+-------------------------------------------------------------------------------
+
+Standalone mode can be run on a single machine in a non-Hadoop environment,
+allowing data scientists to develop algorithms locally without need of a
+distributed cluster. The Standalone release packages all required libraries
+into a single distribution. Standalone mode is not appropriate for large
+datasets.
+
+OS X and Linux users can use the runStandaloneSystemML.sh script to run in
+Standalone mode, while Windows users can use the runStandaloneSystemML.bat
+script.
+
+
+-------------------------------------------------------------------------------
+Hello World Example
+-------------------------------------------------------------------------------
+
+The following example will run a "hello world" DML script on SystemML in
+Standalone mode.
+
+$ echo 'print("hello world");' > helloworld.dml
+$ ./runStandaloneSystemML.sh helloworld.dml
+
+
+-------------------------------------------------------------------------------
+Running SystemML Algorithms
+-------------------------------------------------------------------------------
+
+Several existing algorithms can be found in the scripts directory in the
+Standalone distribution. In the following example, we first obtain Haberman's
+Survival Data Set. We create a metadata file for this data. We create a
+types.csv file that describes the type of each column along with a
+corresponding metadata file. We then run the Univariate Statistics algorithm
+on the data in Standalone mode. The results are output to the
+data/univarOut.mtx file.
+
+$ wget -P data/ http://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data
+$ echo '{"rows": 306, "cols": 4, "format": "csv"}' > data/haberman.data.mtd
+$ echo '1,1,1,2' > data/types.csv
+$ echo '{"rows": 1, "cols": 4, "format": "csv"}' > data/types.csv.mtd
+$ ./runStandaloneSystemML.sh scripts/algorithms/Univar-Stats.dml -nvargs X=data/haberman.data TYPES=data/types.csv STATS=data/univarOut.mtx
+
+For more information, please see the online SystemML documentation.
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/048ac6d1/src/main/standalone/readme.txt
----------------------------------------------------------------------
diff --git a/src/main/standalone/readme.txt b/src/main/standalone/readme.txt
deleted file mode 100644
index 15541f5..0000000
--- a/src/main/standalone/readme.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-SystemML enables declarative, large-scale machine learning (ML) via a high-level language with R-like syntax. Data scientists use this language to express their ML algorithms with full flexibility but without the need to hand-tune distributed runtime execution plans and system configurations. These ML programs are dynamically compiled and optimized based on data and cluster characteristics using rule and cost-based optimization techniques. The compiler automatically generates hybrid runtime execution plans ranging from in-memory, single node execution to distributed MapReduce (MR) computation and data access.
-
-jSystemML.jar is derived out of SystemML.jar, to work in non-Hadoop desktop/laptop environment just like a Java appln. 
-
-We recommend to use "-exec singlenode" option, in order to force in-memory computation.
-
-If you see error "java.lang.OutOfMemoryError", then edit the invocation script to adjust JVM memory "-Xmx20g -Xms20g -Xmn1g".
-
-Please see the help/usage page at :-
-java -jar jSystemML.jar -help
-java -jar jSystemML.jar -?
\ No newline at end of file


[52/55] incubator-systemml git commit: Remove entry longer than 100 characters_warnings

Posted by du...@apache.org.
Remove entry longer than 100 characters_warnings

During tar creation, the following warnings were generated:
[WARNING] Entry: ... longer than 100 characters.
Remove by setting tarLongFileMode to gnu for maven-assembly-plugin.

Closes #53.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/caa3c2aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/caa3c2aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/caa3c2aa

Branch: refs/heads/branch-0.9
Commit: caa3c2aa859f18e222307d35e9c8b6056f54f4aa
Parents: 816e2db
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Fri Jan 22 09:56:05 2016 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Fri Jan 22 09:56:05 2016 -0800

----------------------------------------------------------------------
 pom.xml | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/caa3c2aa/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b4dfb52..ba0e893 100644
--- a/pom.xml
+++ b/pom.xml
@@ -512,6 +512,9 @@
 					<plugin>
 						<artifactId>maven-assembly-plugin</artifactId>
 						<version>2.4</version><!--$NO-MVN-MAN-VER$-->
+						<configuration>
+							<tarLongFileMode>gnu</tarLongFileMode>
+						</configuration>
 						<executions>
 							<execution>
 								<id>create-source-distribution-assembly</id>


[26/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/id3/id3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/id3/id3.R b/src/test/scripts/applications/id3/id3.R
index 8528b52..b838607 100644
--- a/src/test/scripts/applications/id3/id3.R
+++ b/src/test/scripts/applications/id3/id3.R
@@ -1,242 +1,242 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-library("matrixStats")
-
-options(warn=-1)
-
-
-id3_learn = function(X, y, X_subset, attributes, minsplit)
-{
-	#try the two base cases first
-
-	#of the remaining samples, compute a histogram for labels
-	hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
-	hist_labels = as.matrix(hist_labels_helper[,2])
-
-	#go through the histogram to compute the number of labels
-	#with non-zero samples
-	#and to pull out the most popular label
-	
-	num_non_zero_labels = sum(hist_labels > 0);
-	most_popular_label = which.max(t(hist_labels));
-	num_remaining_attrs = sum(attributes)
-	
-	num_samples = sum(X_subset)
-	mpl = as.numeric(most_popular_label)
-	
-	nodes = matrix(0, 1, 1)
-	edges = matrix(0, 1, 1)
-	
-	#if all samples have the same label then return a leaf node
-	#if no attributes remain then return a leaf node with the most popular label	
-	if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){
-		nodes = matrix(0, 1, 2)
-		nodes[1,1] = -1
-		nodes[1,2] = most_popular_label
-		edges = matrix(-1, 1, 1)
-	}else{
-		#computing gains for all available attributes using parfor
-		hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
-		hist_labels2 = as.matrix(hist_labels2_helper[,2])
-		num_samples2 = sum(X_subset)
-		zero_entries_in_hist1 = (hist_labels2 == 0)
-		pi1 = hist_labels2/num_samples2
-		log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1
-		entropy_vector1 = -pi1*log(log_term1)
-		ht = sum(entropy_vector1)
-		
-		sz = nrow(attributes)
-		gains = matrix(0, sz, 1)
-		for(i in 1:nrow(attributes)){
-			if(as.numeric(attributes[i,1]) == 1){
-				attr_vals = X[,i]
-				attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
-				attr_domain = as.matrix(attr_domain_helper[,2])
-
-				hxt_vector = matrix(0, nrow(attr_domain), 1)
-				
-				for(j in 1:nrow(attr_domain)){
-					if(as.numeric(attr_domain[j,1]) != 0){
-						val = j
-						Tj = X_subset * (X[,i] == val)
-						
-						#entropy = compute_entropy(Tj, y)
-						hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum))
-						hist_labels1 = as.matrix(hist_labels1_helper[,2])
-						num_samples1 = sum(Tj)
-						zero_entries_in_hist = (hist_labels1 == 0)
-						pi = hist_labels1/num_samples1
-						log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi
-						entropy_vector = -pi*log(log_term)
-						entropy = sum(entropy_vector)
-	
-						hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy
-					}
-				}
-				hxt = sum(hxt_vector)
-				gains[i,1] = (ht - hxt)
-			}
-		}
-		
-		#pick out attr with highest gain
-		best_attr = -1
-		max_gain = 0
-		for(i4 in 1:nrow(gains)){
-			if(as.numeric(attributes[i4,1]) == 1){
-				g = as.numeric(gains[i4,1])
-				if(best_attr == -1 | max_gain <= g){
-					max_gain = g
-					best_attr = i4
-				}
-			}
-		}		
-		
-		attr_vals = X[,best_attr]
-		attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
-		attr_domain = as.matrix(attr_domain_helper[,2])
-
-		new_attributes = attributes
-		new_attributes[best_attr, 1] = 0
-		
-		max_sz_subtree = 2*sum(X_subset)
-		sz2 = nrow(attr_domain)
-		sz1 = sz2*max_sz_subtree
-		
-		tempNodeStore = matrix(0, 2, sz1)
-		tempEdgeStore = matrix(0, 3, sz1)
-		numSubtreeNodes = matrix(0, sz2, 1)
-		numSubtreeEdges = matrix(0, sz2, 1)
-		
-		for(i1 in 1:nrow(attr_domain)){
-			
-			Ti = X_subset * (X[,best_attr] == i1)
-			num_nodes_Ti = sum(Ti)
-			
-			if(num_nodes_Ti > 0){
-				tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit)
-			  nodesi = as.matrix(tmpRet$a);
-        edgesi = as.matrix(tmpRet$b);
-      
-				start_pt = 1+(i1-1)*max_sz_subtree
-        tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi)
-			
-        numSubtreeNodes[i1,1] = nrow(nodesi)
-				if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){
-					tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi)
-					numSubtreeEdges[i1,1] = nrow(edgesi)
-				}else{
-					numSubtreeEdges[i1,1] = 0
-				}
-			}
-		}
-		
-		num_nodes_in_subtrees = sum(numSubtreeNodes)
-		num_edges_in_subtrees = sum(numSubtreeEdges)
-		
-		#creating root node
-		sz = 1+num_nodes_in_subtrees
-		
-		nodes = matrix(0, sz, 2)
-		nodes[1,1] = best_attr
-		numNodes = 1
-		
-		#edges from root to children
-		sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees
-		
-		edges = matrix(1, sz, 3)
-		numEdges = 0
-		for(i6 in 1:nrow(attr_domain)){
-			num_nodesi = as.numeric(numSubtreeNodes[i6,1])
-			if(num_nodesi > 0){
-				edges[numEdges+1,2] = i6
-				numEdges = numEdges + 1
-			}
-		}
-		
-		nonEmptyAttri = 0
-		for(i7 in 1:nrow(attr_domain)){
-			numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1])
-		
-			if(numNodesInSubtree > 0){
-				start_pt1 = 1 + (i7-1)*max_sz_subtree
-				nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)])
-			
-				numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1])
-			
-				if(numEdgesInSubtree!=0){
-					edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)])
-					edgesi1[,1] = edgesi1[,1] + numNodes
-					edgesi1[,3] = edgesi1[,3] + numNodes
-          
-					edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1
-					numEdges = numEdges + numEdgesInSubtree
-				}
-			
-				edges[nonEmptyAttri+1,3] = numNodes + 1
-				nonEmptyAttri = nonEmptyAttri + 1
-				
-				numNodes = numNodes + numNodesInSubtree
-			}
-		}
-	}
-  
-  return ( list(a=nodes, b=edges) );
-}
-
-X = readMM(paste(args[1], "X.mtx", sep=""));
-y = readMM(paste(args[1], "y.mtx", sep=""));
-
-n = nrow(X)
-m = ncol(X)
-
-minsplit = 2
-
-
-X_subset = matrix(1, n, 1)
-attributes = matrix(1, m, 1)
-# recoding inputs
-featureCorrections = as.vector(1 - colMins(as.matrix(X)))
-onesMat = matrix(1, n, m)
-
-X = onesMat %*% diag(featureCorrections) + X
-labelCorrection = 1 - min(y)
-y = y + labelCorrection + 0
-
-tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit)
-nodes = as.matrix(tmpRet$a)
-edges = as.matrix(tmpRet$b)
-
-# decoding outputs
-nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1)
-for(i3 in 1:nrow(edges)){
-	e_parent = as.numeric(edges[i3,1])
-	parent_feature = as.numeric(nodes[e_parent,1])
-	correction = as.numeric(featureCorrections[parent_feature])
-	edges[i3,2] = edges[i3,2] - correction
-}
-
-writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text")
-writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+library("matrixStats")
+
+options(warn=-1)
+
+
+id3_learn = function(X, y, X_subset, attributes, minsplit)
+{
+	#try the two base cases first
+
+	#of the remaining samples, compute a histogram for labels
+	hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
+	hist_labels = as.matrix(hist_labels_helper[,2])
+
+	#go through the histogram to compute the number of labels
+	#with non-zero samples
+	#and to pull out the most popular label
+	
+	num_non_zero_labels = sum(hist_labels > 0);
+	most_popular_label = which.max(t(hist_labels));
+	num_remaining_attrs = sum(attributes)
+	
+	num_samples = sum(X_subset)
+	mpl = as.numeric(most_popular_label)
+	
+	nodes = matrix(0, 1, 1)
+	edges = matrix(0, 1, 1)
+	
+	#if all samples have the same label then return a leaf node
+	#if no attributes remain then return a leaf node with the most popular label	
+	if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){
+		nodes = matrix(0, 1, 2)
+		nodes[1,1] = -1
+		nodes[1,2] = most_popular_label
+		edges = matrix(-1, 1, 1)
+	}else{
+		#computing gains for all available attributes using parfor
+		hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
+		hist_labels2 = as.matrix(hist_labels2_helper[,2])
+		num_samples2 = sum(X_subset)
+		zero_entries_in_hist1 = (hist_labels2 == 0)
+		pi1 = hist_labels2/num_samples2
+		log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1
+		entropy_vector1 = -pi1*log(log_term1)
+		ht = sum(entropy_vector1)
+		
+		sz = nrow(attributes)
+		gains = matrix(0, sz, 1)
+		for(i in 1:nrow(attributes)){
+			if(as.numeric(attributes[i,1]) == 1){
+				attr_vals = X[,i]
+				attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
+				attr_domain = as.matrix(attr_domain_helper[,2])
+
+				hxt_vector = matrix(0, nrow(attr_domain), 1)
+				
+				for(j in 1:nrow(attr_domain)){
+					if(as.numeric(attr_domain[j,1]) != 0){
+						val = j
+						Tj = X_subset * (X[,i] == val)
+						
+						#entropy = compute_entropy(Tj, y)
+						hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum))
+						hist_labels1 = as.matrix(hist_labels1_helper[,2])
+						num_samples1 = sum(Tj)
+						zero_entries_in_hist = (hist_labels1 == 0)
+						pi = hist_labels1/num_samples1
+						log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi
+						entropy_vector = -pi*log(log_term)
+						entropy = sum(entropy_vector)
+	
+						hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy
+					}
+				}
+				hxt = sum(hxt_vector)
+				gains[i,1] = (ht - hxt)
+			}
+		}
+		
+		#pick out attr with highest gain
+		best_attr = -1
+		max_gain = 0
+		for(i4 in 1:nrow(gains)){
+			if(as.numeric(attributes[i4,1]) == 1){
+				g = as.numeric(gains[i4,1])
+				if(best_attr == -1 | max_gain <= g){
+					max_gain = g
+					best_attr = i4
+				}
+			}
+		}		
+		
+		attr_vals = X[,best_attr]
+		attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
+		attr_domain = as.matrix(attr_domain_helper[,2])
+
+		new_attributes = attributes
+		new_attributes[best_attr, 1] = 0
+		
+		max_sz_subtree = 2*sum(X_subset)
+		sz2 = nrow(attr_domain)
+		sz1 = sz2*max_sz_subtree
+		
+		tempNodeStore = matrix(0, 2, sz1)
+		tempEdgeStore = matrix(0, 3, sz1)
+		numSubtreeNodes = matrix(0, sz2, 1)
+		numSubtreeEdges = matrix(0, sz2, 1)
+		
+		for(i1 in 1:nrow(attr_domain)){
+			
+			Ti = X_subset * (X[,best_attr] == i1)
+			num_nodes_Ti = sum(Ti)
+			
+			if(num_nodes_Ti > 0){
+				tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit)
+			  nodesi = as.matrix(tmpRet$a);
+        edgesi = as.matrix(tmpRet$b);
+      
+				start_pt = 1+(i1-1)*max_sz_subtree
+        tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi)
+			
+        numSubtreeNodes[i1,1] = nrow(nodesi)
+				if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){
+					tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi)
+					numSubtreeEdges[i1,1] = nrow(edgesi)
+				}else{
+					numSubtreeEdges[i1,1] = 0
+				}
+			}
+		}
+		
+		num_nodes_in_subtrees = sum(numSubtreeNodes)
+		num_edges_in_subtrees = sum(numSubtreeEdges)
+		
+		#creating root node
+		sz = 1+num_nodes_in_subtrees
+		
+		nodes = matrix(0, sz, 2)
+		nodes[1,1] = best_attr
+		numNodes = 1
+		
+		#edges from root to children
+		sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees
+		
+		edges = matrix(1, sz, 3)
+		numEdges = 0
+		for(i6 in 1:nrow(attr_domain)){
+			num_nodesi = as.numeric(numSubtreeNodes[i6,1])
+			if(num_nodesi > 0){
+				edges[numEdges+1,2] = i6
+				numEdges = numEdges + 1
+			}
+		}
+		
+		nonEmptyAttri = 0
+		for(i7 in 1:nrow(attr_domain)){
+			numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1])
+		
+			if(numNodesInSubtree > 0){
+				start_pt1 = 1 + (i7-1)*max_sz_subtree
+				nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)])
+			
+				numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1])
+			
+				if(numEdgesInSubtree!=0){
+					edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)])
+					edgesi1[,1] = edgesi1[,1] + numNodes
+					edgesi1[,3] = edgesi1[,3] + numNodes
+          
+					edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1
+					numEdges = numEdges + numEdgesInSubtree
+				}
+			
+				edges[nonEmptyAttri+1,3] = numNodes + 1
+				nonEmptyAttri = nonEmptyAttri + 1
+				
+				numNodes = numNodes + numNodesInSubtree
+			}
+		}
+	}
+  
+  return ( list(a=nodes, b=edges) );
+}
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+y = readMM(paste(args[1], "y.mtx", sep=""));
+
+n = nrow(X)
+m = ncol(X)
+
+minsplit = 2
+
+
+X_subset = matrix(1, n, 1)
+attributes = matrix(1, m, 1)
+# recoding inputs
+featureCorrections = as.vector(1 - colMins(as.matrix(X)))
+onesMat = matrix(1, n, m)
+
+X = onesMat %*% diag(featureCorrections) + X
+labelCorrection = 1 - min(y)
+y = y + labelCorrection + 0
+
+tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit)
+nodes = as.matrix(tmpRet$a)
+edges = as.matrix(tmpRet$b)
+
+# decoding outputs
+nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1)
+for(i3 in 1:nrow(edges)){
+	e_parent = as.numeric(edges[i3,1])
+	parent_feature = as.numeric(nodes[e_parent,1])
+	correction = as.numeric(featureCorrections[parent_feature])
+	edges[i3,2] = edges[i3,2] - correction
+}
+
+writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text")
+writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/imputeGaussMCMC.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/imputeGaussMCMC.dml b/src/test/scripts/applications/impute/imputeGaussMCMC.dml
index 1ebe5a0..21ecaee 100644
--- a/src/test/scripts/applications/impute/imputeGaussMCMC.dml
+++ b/src/test/scripts/applications/impute/imputeGaussMCMC.dml
@@ -1,687 +1,687 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
-# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
-# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
-# Soft linear regression constraints define dependencies between values in/across the reports.
-# Linear regression parameters are unknown and sampled together with the missing values in the reports.
-#
-# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
-#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
-#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
-#
-# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
-#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
-#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
-#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
-#
-# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
-#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
-#
-# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
-#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
-#
-# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
-#          from the vector of parameters to the stretched matrix of regression factors.
-#
-# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
-#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
-#
-# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
-#
-# INPUT 8 : Number of factors in a regression equation, including the estimated value
-# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
-#           BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
-# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
-#
-# INPUT 11: Output file name and path for the average MCMC reports table
-# INPUT 12: Output file for debugging (currently: the average parameters vector)
-#
-# Example:
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
-#    test/scripts/applications/impute/initial_reports
-#    test/scripts/applications/impute/CReps
-#    test/scripts/applications/impute/RegresValueMap
-#    test/scripts/applications/impute/RegresFactorDefault
-#    test/scripts/applications/impute/RegresParamMap
-#    test/scripts/applications/impute/RegresCoeffDefault
-#    test/scripts/applications/impute/RegresScaleMult
-#    4 1000 100
-#    test/scripts/applications/impute/output_reports
-#    test/scripts/applications/impute/debug_info
-
-
-print ("START ImputeGaussMCMC");
-print ("Reading the input files...");
-
-initial_reports = read ($1);
-CReps = read ($2);
-
-num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
-num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
-num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
-
-dReps_size = num_terms * num_attrs;
-dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
-
-# We assume that all report-series consistent with hard constraints form an affine set:
-#     reports = CReps %*% freeVars + dReps
-# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
-# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
-
-RegresValueMap = read ($3);
-RegresFactorDefault = read ($4);
-RegresParamMap = read ($5); 
-RegresCoeffDefault = read ($6); 
-RegresScaleMult = read ($7);
-
-num_factors = $8;   # Number of factors in each regression equation, including the estimated value
-num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
-num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
-max_num_burnin_iterations = $9;
-max_num_observed_iterations = $10;
-
-ones_fv = matrix (1.0, rows = 1, cols = num_frees);
-ones_pm = matrix (1.0, rows = 1, cols = num_params);
-twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs);
-
-# Create a summation operator (matrix) that adds the factors in each regression:
-
-ncols_SumFactor = num_reg_eqs * num_factors;
-SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor);
-ones_f = matrix (1.0, rows = 1, cols = num_factors);
-SumFactor [1, 1:num_factors] = ones_f;
-nSumFactor = 1;
-while (nSumFactor < num_reg_eqs) {
-    incSumFactor = nSumFactor;
-    if (incSumFactor > num_reg_eqs - nSumFactor) {
-        incSumFactor = num_reg_eqs - nSumFactor;
-    }
-    SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor), 
-        (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] = 
-            SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)];
-    nSumFactor = nSumFactor + incSumFactor;
-}
-
-freeVars = matrix (0.0, rows = num_frees, cols = 1);
-params = matrix (1.0, rows = num_params, cols = 1);
-
-
-
-num_opt_iter = 20;
-print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
-
-reports = CReps %*% freeVars + dReps;
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
-
-bilinear_sums = SumFactor %*% (regresValues * regresParams);
-w_bilinear_sums = RegresScaleMult * bilinear_sums;
-bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
-
-opt_iter = 1;
-is_step_params = 1;
-is_opt_converged = 0;
-
-print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
-
-while (is_opt_converged == 0)
-{
-    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
-    shift_vector = matrix (0.0, rows = deg, cols = 1);
-
-    # Compute gradient
-
-    if (is_step_params == 1) {
-        gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) *  RegresParamMap)));
-    } else {
-        gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
-    }
-
-    # Make a few conjugate gradient steps
-    
-    residual = t(gradient);
-    p = - residual;
-    norm_r2 = sum (residual * residual);
-    cg_iter = 1;
-    cg_terminate = 0;
-
-    while (cg_terminate == 0)
-    {
-        # Want: q = A %*% p;
-        # Compute gradient change from 0 to p
-
-        if (is_step_params == 1) {
-            w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p)));
-            gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
-        } else {
-            w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams));
-            gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
-        }
-        q = t(gradient_change_p);
-        
-        alpha = norm_r2 / castAsScalar (t(p) %*% q);
-        shift_vector_change = alpha * p;
-        shift_vector = shift_vector + shift_vector_change;
-        old_norm_r2 = norm_r2;
-        residual = residual + alpha * q;
-        norm_r2 = sum (residual * residual);
-        p = - residual + (norm_r2 / old_norm_r2) * p;
-        cg_iter = cg_iter + 1;
-        if (cg_iter > min (deg, 2 + opt_iter / 3)) {
-            cg_terminate = 1;
-        }
-    }
-
-    if (is_step_params == 1) {
-        params = params + shift_vector;
-        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    } else {
-        freeVars = freeVars + shift_vector;
-        reports = CReps %*% freeVars + dReps;
-        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    }
-    
-    # Update the bilinear form and check convergence
-
-    if (is_step_params == 1) {
-        old_bilinear_form_value = bilinear_form_value;
-    }
-
-    bilinear_sums = SumFactor %*% (regresValues * regresParams);
-    w_bilinear_sums = RegresScaleMult * bilinear_sums;
-    bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
-        
-    if (is_step_params == 1) {
-        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
-    } else {
-        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
-    }
-    
-    is_step_params = 1 - is_step_params;
-    opt_iter = opt_iter + 1;
-
-    if (is_step_params == 1 & opt_iter > num_opt_iter) {
-        is_opt_converged = 1;
-    }
-}
-
-
-
-/*  UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT
-
-
-
-print ("Starting Gradient Descent...");
-### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE
-
-# num_past_dirVs  = 3;
-# past_dirVFrees  = matrix (0.0, rows = num_frees,  cols = num_past_dirVs);
-# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs);
-
-shift_T = -1000.0;
-is_enough_gradient_descent = 0;
-gd_iter = 0;
-
-while (is_enough_gradient_descent == 0)
-{
-### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT
-
-    reports = CReps %*% freeVars + dReps;
-    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
-
-    bilinear_sums = SumFactor %*% (regresValues * regresParams);
-    w_bilinear_sums = RegresScaleMult * bilinear_sums;
-    gradientInFrees  = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
-    gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) *  RegresParamMap)));
-    
-### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS
-
-    shift_frees  = matrix (0.0, rows = num_frees,  cols = 1);
-    shift_params = matrix (0.0, rows = num_params, cols = 1);
-
-    residual_frees  = t(gradientInFrees);
-    residual_params = t(gradientInParams);
-    
-    p_frees  = - residual_frees;
-    p_params = - residual_params;
-    
-    norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
-    
-    cg_iter = 1;
-    cg_terminate = 0;
-    cg_eps = 0.000001;
-
-    while (cg_terminate == 0)
-    {
-        regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees);
-        regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params);
-
-        bilinear_sums_eps_p   = SumFactor %*% (regresValues_eps_p * regresParams_eps_p);
-        w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p;
-        
-        gradientInFrees_eps_p  = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps))));
-        gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) *  RegresParamMap)));
-        
-        q_frees  = t(gradientInFrees_eps_p  - gradientInFrees)  / cg_eps;
-        q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps;
-        
-        alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params);
-
-        shift_frees  = shift_frees  + alpha * p_frees;
-        shift_params = shift_params + alpha * p_params;
-
-        old_norm_r2 = norm_r2;
-        
-        residual_frees  = residual_frees  + alpha * q_frees;
-        residual_params = residual_params + alpha * q_params;
-        
-        norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
-        
-        p_frees  = - residual_frees  + (norm_r2 / old_norm_r2) * p_frees;
-        p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params;
-        
-        cg_iter = cg_iter + 1;
-        if (cg_iter > 4) {
-            cg_terminate = 1;
-        }
-    }
-
-### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT
-    
-    dirVFrees_candidate  = shift_frees;
-    dirVParams_candidate = shift_params;
-  
-#        random_frees  = Rand (rows = num_frees,  cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
-#        random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
-#        dirVFrees_candidate  = dirVFrees_candidate  * random_frees;
-#        dirVParams_candidate = dirVParams_candidate * random_params;
-
-    dirVFrees  = dirVFrees_candidate;
-    dirVParams = dirVParams_candidate;
-
-#   dirV_proj_factors =  t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate;
-#   dirVFrees  = dirVFrees_candidate  - past_dirVFrees  %*% dirV_proj_factors;
-#   dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors;
-    
-    dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams));
-    dirVFrees  = dirVFrees  / dirV_denom;
-    dirVParams = dirVParams / dirV_denom;
-
-#   past_dirVFrees  [, 2:num_past_dirVs] = past_dirVFrees  [, 1:(num_past_dirVs-1)];
-#   past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)];
-#   past_dirVFrees  [, 1] = dirVFrees;
-#   past_dirVParams [, 1] = dirVParams;
-
-        
-### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR  d loss(t) / dt
-
-    dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees;
-    dirVRegresParams = RegresParamMap %*% dirVParams;
-    dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams);
-
-    dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams);
-    L_0 = sum (w_bilinear_sums * bilinear_sums);
-    L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums);
-
-    freeVars_T = freeVars + shift_T * dirVFrees;
-    params_T   = params   + shift_T * dirVParams;
-
-    reports_T = CReps %*% freeVars_T + dReps;
-    regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault;
-    regresParams_T = RegresParamMap %*% params_T  + RegresCoeffDefault;
-
-    bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T);
-    w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T;
-    dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams);
-    
-    L_T = sum (w_bilinear_sums_T * bilinear_sums_T);
-    L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T);
-    
-    coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums);
-    coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T);
-    coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T;
-    coeff_d = L_prime_0;
-
-### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT)
-
-    coeff_aa = coeff_b / coeff_a;
-    coeff_bb = coeff_c / coeff_a;
-    coeff_cc = coeff_d / coeff_a;
-
-    coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
-    coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
-
-    root_choice = 0.0;
-    if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
-    {
-        two_pi_third = 2.0943951023931954923084289221863;
-        acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
-        
-        x = abs (acos_argument);
-        acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
-            + x * ( 0.0889789874 + x * (-0.0501743046
-            + x * ( 0.0308918810 + x * (-0.0170881256
-            + x * ( 0.0066700901 + x * (-0.0012624911))))))));
-        if (acos_argument >= 0.0) {
-            coeff_theta = acos_x;
-        } else {
-            coeff_theta = 3.1415926535897932384626433832795 - acos_x;
-        }
-        
-        root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
-        root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
-        root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
-        root_min = min (min (root_1, root_2), root_3);
-        root_max = max (max (root_1, root_2), root_3);        
-        root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max
-                      - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min;
-        if (root_int_diff >= 0.0) {
-            root_choice = root_min;
-        } else {
-            root_choice = root_max;
-        }
-    } else {
-        if (coeff_R >= 0.0) {
-            sgn_coeff_R = 1.0;
-        } else {
-            sgn_coeff_R = -1.0;
-        }
-        coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
-        if (coeff_bigA != 0.0) {
-            root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
-        } else {
-            root_choice = - coeff_aa / 3.0;
-        }
-    }
-    
-    root_choice = root_choice - 
-        (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) 
-            / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
-    root_choice = root_choice - 
-        (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) 
-            / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
-
-
-### GD-STEP 6: FINISH UP THE ITERATION
-
-    freeVars = freeVars + root_choice * dirVFrees;
-    params   = params   + root_choice * dirVParams;
-
-    root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice;
-    if (- root_int_diff < 0.00000001 * L_0) {
-        is_enough_gradient_descent = 1;
-    }
-    gd_iter = gd_iter + 1;
-    print ("Grad Descent Iter " + gd_iter + ":  L = " + (L_0 + root_int_diff) + ";  shift = " + root_choice);
-    shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T));
-}
-
-
-print ("Gradient Descent finished.  Starting MCMC...");
-
-
-
-
-END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT  */
-
-
-
-
-
-
-
-
-print ("Performing MCMC initialization...");
-
-reports = CReps %*% freeVars + dReps;
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
-
-bilinear_vector = regresValues * regresParams;
-bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-
-max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
-dim_sample = num_frees + num_params;
-sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
-
-# Generate a random permutation matrix for the sampling order of freeVars and params
-
-SampleOrder = diag (sample_ones);
-num_swaps = 10 * dim_sample;
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-left_swap  = round (0.5 + dim_sample * rnd);
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-right_swap = round (0.5 + dim_sample * rnd);
-for (swap_i in 1:num_swaps) {
-    l = castAsScalar (left_swap  [swap_i, 1]);
-    r = castAsScalar (right_swap [swap_i, 1]);
-    if (l != r) {
-        tmp_row = SampleOrder [l, ];
-        SampleOrder [l, ] = SampleOrder [r, ];
-        SampleOrder [r, ] = tmp_row;
-    }
-}
-
-pi = 3.1415926535897932384626433832795;
-zero = matrix (0.0, rows = 1, cols = 1);
-
-isVar = colSums (SampleOrder [1 : num_frees, ]);
-sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
-num_of_observed_reports = 0;
-sum_of_observed_losses = 0.0;
-is_observed = 0;
-
-is_calculating_loss_change = 0;
-is_monitoring_loss_change = 0;
-avg_prob_of_loss_increase = 0;
-update_factor_for_avg_loss_change = 0.02;
-avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
-old_bilinear_form_value = bilinear_form_value;
-
-# Starting MCMC iterations
-
-iter = 0;
-
-while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
-{
-    iter = iter + 1;
-
-    # Initialize (bi-)linear forms
-    
-    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    bilinear_form_vector = regresValues * regresParams;
-    
-    bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-    
-    if (bilinear_form_value > old_bilinear_form_value) {
-        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
-    } else {
-        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
-    }
-    if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
-        is_calculating_loss_change = 1;
-    }
-    if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
-        is_calculating_loss_change = 1;
-        is_monitoring_loss_change = 1;
-        print ("Monitoring the average loss change is ON.        ");
-    }
-    if (is_calculating_loss_change == 1) {
-        avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) 
-            + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
-    }
-    if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
-        print ("Burn-in ENDS, observation STARTS.        ");
-        is_observed = 1;
-    }
-    
-    old_bilinear_form_value = bilinear_form_value;
-    
-    bilinear_form_value_to_print = bilinear_form_value;
-    if (bilinear_form_value < 100000) {
-        bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
-    } else {
-    if (bilinear_form_value < 1000000000) {
-        bilinear_form_value_to_print = round (bilinear_form_value);
-    }}
-
-    if (is_monitoring_loss_change == 0) {
-        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
-            + ",  bilinear form value = " + bilinear_form_value_to_print);
-    } else {
-        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) 
-            + ",  bilinear form value = " + bilinear_form_value_to_print + ",  avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
-    }
-    
-    # Create a normally distributed random sample
-    
-    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
-    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
-    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
-    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
-    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
-    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
-        
-    # Initialize updaters
-    
-    freeVars_updater = freeVars * 0.0;
-    params_updater = params * 0.0;
-    regresValues_updater = regresValues * 0.0;
-    regresParams_updater = regresParams * 0.0;
-    bilinear_updater_vector = bilinear_form_vector * 0.0;
-    
-    # Perform the sampling
-
-    for (idx in 1:dim_sample)
-    {
-        # Generate the sample unit-vector and updaters
-        
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars_updater = SampleOrder [1 : num_frees, idx];
-            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
-            bilinear_updater_vector = regresValues_updater * regresParams;
-        } else {
-            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
-            regresParams_updater = RegresParamMap %*% params_updater;
-            bilinear_updater_vector = regresValues * regresParams_updater;
-        }
-        bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-            
-        # Compute the quadratic by three shift-points: -1, 0, +1
-
-        bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
-        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
-        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
-        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
-
-        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
-        # then compute the shift to get the new sample
-            
-        mean_shift  = - coeff_b / (2.0 * coeff_a);
-        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
-        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
-            
-# BEGIN DEBUG INSERT
-# mmm = 1;
-# if (castAsScalar (isVar [1, idx]) > 0.5 &          # IT IS A FREE VARIABLE, NOT A PARAMETER
-#     castAsScalar (freeVars_updater [mmm, 1]) > 0)  # IT IS mmm-TH FREE VARIABLE
-# {
-# #   print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
-#     print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
-# }
-# if (castAsScalar (isVar [1, idx]) <= 0.5 &       # IT IS A PARAMETER, NOT A FREE VARIABLE
-#     castAsScalar (params_updater [mmm, 1]) > 0)  # IT IS mmm-TH PARAMETER
-# {
-# #   print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
-#     print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
-# }
-# END DEBUG INSERT
-
-        # Perform the updates
-
-        bilinear_form = bilinear_form + shift * bilinear_updater;
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars = freeVars + shift * freeVars_updater;
-            regresValues = regresValues + shift * regresValues_updater;
-        } else {
-            params = params + shift * params_updater;
-            regresParams = regresParams + shift * regresParams_updater;
-        }
-    }
-    
-    # Update / adjust the reports and the parameters
-    
-    reports = CReps %*% freeVars + dReps;
-    reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
-        
-    # Make an observation of the reports and/or the parameters
-    
-    if (is_observed > 0)
-    {
-        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
-        num_of_observed_reports = num_of_observed_reports + 1;
-
-        sum_of_observed_params = sum_of_observed_params + params;
-        sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
-    }
-
-# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
-# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
-# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
-# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
-# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
-# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
-# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
-# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
-
-# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
-# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
-# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
-# v10=castAsScalar(round(reports_matrix[10,num_terms]));
-# v16=castAsScalar(round(reports_matrix[16,num_terms]));
-# v19=castAsScalar(round(reports_matrix[19,num_terms]));
-
-#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
-## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
-#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
-## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
-
-}
-
-print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
-print ("Writing out the results...");
-
-avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
-avg_params = sum_of_observed_params / num_of_observed_reports;
-write (avg_reports_matrix, $11, format="text");
-write (avg_params, $12, format="text");
-
-print ("END ImputeGaussMCMC");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
+# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
+# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
+# Soft linear regression constraints define dependencies between values in/across the reports.
+# Linear regression parameters are unknown and sampled together with the missing values in the reports.
+#
+# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
+#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
+#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
+#
+# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
+#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
+#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
+#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
+#
+# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
+#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
+#
+# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
+#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
+#
+# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
+#          from the vector of parameters to the stretched matrix of regression factors.
+#
+# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
+#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
+#
+# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
+#
+# INPUT 8 : Number of factors in a regression equation, including the estimated value
+# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
+#           BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
+# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
+#
+# INPUT 11: Output file name and path for the average MCMC reports table
+# INPUT 12: Output file for debugging (currently: the average parameters vector)
+#
+# Example:
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
+#    test/scripts/applications/impute/initial_reports
+#    test/scripts/applications/impute/CReps
+#    test/scripts/applications/impute/RegresValueMap
+#    test/scripts/applications/impute/RegresFactorDefault
+#    test/scripts/applications/impute/RegresParamMap
+#    test/scripts/applications/impute/RegresCoeffDefault
+#    test/scripts/applications/impute/RegresScaleMult
+#    4 1000 100
+#    test/scripts/applications/impute/output_reports
+#    test/scripts/applications/impute/debug_info
+
+
+print ("START ImputeGaussMCMC");
+print ("Reading the input files...");
+
+initial_reports = read ($1);
+CReps = read ($2);
+
+num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
+num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
+num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
+
+dReps_size = num_terms * num_attrs;
+dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
+
+# We assume that all report-series consistent with hard constraints form an affine set:
+#     reports = CReps %*% freeVars + dReps
+# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
+# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
+
+RegresValueMap = read ($3);
+RegresFactorDefault = read ($4);
+RegresParamMap = read ($5); 
+RegresCoeffDefault = read ($6); 
+RegresScaleMult = read ($7);
+
+num_factors = $8;   # Number of factors in each regression equation, including the estimated value
+num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
+num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
+max_num_burnin_iterations = $9;
+max_num_observed_iterations = $10;
+
+ones_fv = matrix (1.0, rows = 1, cols = num_frees);
+ones_pm = matrix (1.0, rows = 1, cols = num_params);
+twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs);
+
+# Create a summation operator (matrix) that adds the factors in each regression:
+
+ncols_SumFactor = num_reg_eqs * num_factors;
+SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor);
+ones_f = matrix (1.0, rows = 1, cols = num_factors);
+SumFactor [1, 1:num_factors] = ones_f;
+nSumFactor = 1;
+while (nSumFactor < num_reg_eqs) {
+    incSumFactor = nSumFactor;
+    if (incSumFactor > num_reg_eqs - nSumFactor) {
+        incSumFactor = num_reg_eqs - nSumFactor;
+    }
+    SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor), 
+        (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] = 
+            SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)];
+    nSumFactor = nSumFactor + incSumFactor;
+}
+
+freeVars = matrix (0.0, rows = num_frees, cols = 1);
+params = matrix (1.0, rows = num_params, cols = 1);
+
+
+
+num_opt_iter = 20;
+print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
+
+reports = CReps %*% freeVars + dReps;
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
+
+bilinear_sums = SumFactor %*% (regresValues * regresParams);
+w_bilinear_sums = RegresScaleMult * bilinear_sums;
+bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
+
+opt_iter = 1;
+is_step_params = 1;
+is_opt_converged = 0;
+
+print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
+
+while (is_opt_converged == 0)
+{
+    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
+    shift_vector = matrix (0.0, rows = deg, cols = 1);
+
+    # Compute gradient
+
+    if (is_step_params == 1) {
+        gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) *  RegresParamMap)));
+    } else {
+        gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+    }
+
+    # Make a few conjugate gradient steps
+    
+    residual = t(gradient);
+    p = - residual;
+    norm_r2 = sum (residual * residual);
+    cg_iter = 1;
+    cg_terminate = 0;
+
+    while (cg_terminate == 0)
+    {
+        # Want: q = A %*% p;
+        # Compute gradient change from 0 to p
+
+        if (is_step_params == 1) {
+            w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p)));
+            gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
+        } else {
+            w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams));
+            gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+        }
+        q = t(gradient_change_p);
+        
+        alpha = norm_r2 / castAsScalar (t(p) %*% q);
+        shift_vector_change = alpha * p;
+        shift_vector = shift_vector + shift_vector_change;
+        old_norm_r2 = norm_r2;
+        residual = residual + alpha * q;
+        norm_r2 = sum (residual * residual);
+        p = - residual + (norm_r2 / old_norm_r2) * p;
+        cg_iter = cg_iter + 1;
+        if (cg_iter > min (deg, 2 + opt_iter / 3)) {
+            cg_terminate = 1;
+        }
+    }
+
+    if (is_step_params == 1) {
+        params = params + shift_vector;
+        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    } else {
+        freeVars = freeVars + shift_vector;
+        reports = CReps %*% freeVars + dReps;
+        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    }
+    
+    # Update the bilinear form and check convergence
+
+    if (is_step_params == 1) {
+        old_bilinear_form_value = bilinear_form_value;
+    }
+
+    bilinear_sums = SumFactor %*% (regresValues * regresParams);
+    w_bilinear_sums = RegresScaleMult * bilinear_sums;
+    bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
+        
+    if (is_step_params == 1) {
+        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
+    } else {
+        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
+    }
+    
+    is_step_params = 1 - is_step_params;
+    opt_iter = opt_iter + 1;
+
+    if (is_step_params == 1 & opt_iter > num_opt_iter) {
+        is_opt_converged = 1;
+    }
+}
+
+
+
+/*  UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT
+
+
+
+print ("Starting Gradient Descent...");
+### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE
+
+# num_past_dirVs  = 3;
+# past_dirVFrees  = matrix (0.0, rows = num_frees,  cols = num_past_dirVs);
+# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs);
+
+shift_T = -1000.0;
+is_enough_gradient_descent = 0;
+gd_iter = 0;
+
+while (is_enough_gradient_descent == 0)
+{
+### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT
+
+    reports = CReps %*% freeVars + dReps;
+    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
+
+    bilinear_sums = SumFactor %*% (regresValues * regresParams);
+    w_bilinear_sums = RegresScaleMult * bilinear_sums;
+    gradientInFrees  = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+    gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) *  RegresParamMap)));
+    
+### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS
+
+    shift_frees  = matrix (0.0, rows = num_frees,  cols = 1);
+    shift_params = matrix (0.0, rows = num_params, cols = 1);
+
+    residual_frees  = t(gradientInFrees);
+    residual_params = t(gradientInParams);
+    
+    p_frees  = - residual_frees;
+    p_params = - residual_params;
+    
+    norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
+    
+    cg_iter = 1;
+    cg_terminate = 0;
+    cg_eps = 0.000001;
+
+    while (cg_terminate == 0)
+    {
+        regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees);
+        regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params);
+
+        bilinear_sums_eps_p   = SumFactor %*% (regresValues_eps_p * regresParams_eps_p);
+        w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p;
+        
+        gradientInFrees_eps_p  = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps))));
+        gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) *  RegresParamMap)));
+        
+        q_frees  = t(gradientInFrees_eps_p  - gradientInFrees)  / cg_eps;
+        q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps;
+        
+        alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params);
+
+        shift_frees  = shift_frees  + alpha * p_frees;
+        shift_params = shift_params + alpha * p_params;
+
+        old_norm_r2 = norm_r2;
+        
+        residual_frees  = residual_frees  + alpha * q_frees;
+        residual_params = residual_params + alpha * q_params;
+        
+        norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
+        
+        p_frees  = - residual_frees  + (norm_r2 / old_norm_r2) * p_frees;
+        p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params;
+        
+        cg_iter = cg_iter + 1;
+        if (cg_iter > 4) {
+            cg_terminate = 1;
+        }
+    }
+
+### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT
+    
+    dirVFrees_candidate  = shift_frees;
+    dirVParams_candidate = shift_params;
+  
+#        random_frees  = Rand (rows = num_frees,  cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
+#        random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
+#        dirVFrees_candidate  = dirVFrees_candidate  * random_frees;
+#        dirVParams_candidate = dirVParams_candidate * random_params;
+
+    dirVFrees  = dirVFrees_candidate;
+    dirVParams = dirVParams_candidate;
+
+#   dirV_proj_factors =  t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate;
+#   dirVFrees  = dirVFrees_candidate  - past_dirVFrees  %*% dirV_proj_factors;
+#   dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors;
+    
+    dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams));
+    dirVFrees  = dirVFrees  / dirV_denom;
+    dirVParams = dirVParams / dirV_denom;
+
+#   past_dirVFrees  [, 2:num_past_dirVs] = past_dirVFrees  [, 1:(num_past_dirVs-1)];
+#   past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)];
+#   past_dirVFrees  [, 1] = dirVFrees;
+#   past_dirVParams [, 1] = dirVParams;
+
+        
+### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR  d loss(t) / dt
+
+    dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees;
+    dirVRegresParams = RegresParamMap %*% dirVParams;
+    dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams);
+
+    dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams);
+    L_0 = sum (w_bilinear_sums * bilinear_sums);
+    L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums);
+
+    freeVars_T = freeVars + shift_T * dirVFrees;
+    params_T   = params   + shift_T * dirVParams;
+
+    reports_T = CReps %*% freeVars_T + dReps;
+    regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault;
+    regresParams_T = RegresParamMap %*% params_T  + RegresCoeffDefault;
+
+    bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T);
+    w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T;
+    dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams);
+    
+    L_T = sum (w_bilinear_sums_T * bilinear_sums_T);
+    L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T);
+    
+    coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums);
+    coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T);
+    coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T;
+    coeff_d = L_prime_0;
+
+### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT)
+
+    coeff_aa = coeff_b / coeff_a;
+    coeff_bb = coeff_c / coeff_a;
+    coeff_cc = coeff_d / coeff_a;
+
+    coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
+    coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
+
+    root_choice = 0.0;
+    if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
+    {
+        two_pi_third = 2.0943951023931954923084289221863;
+        acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
+        
+        x = abs (acos_argument);
+        acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
+            + x * ( 0.0889789874 + x * (-0.0501743046
+            + x * ( 0.0308918810 + x * (-0.0170881256
+            + x * ( 0.0066700901 + x * (-0.0012624911))))))));
+        if (acos_argument >= 0.0) {
+            coeff_theta = acos_x;
+        } else {
+            coeff_theta = 3.1415926535897932384626433832795 - acos_x;
+        }
+        
+        root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
+        root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
+        root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
+        root_min = min (min (root_1, root_2), root_3);
+        root_max = max (max (root_1, root_2), root_3);        
+        root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max
+                      - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min;
+        if (root_int_diff >= 0.0) {
+            root_choice = root_min;
+        } else {
+            root_choice = root_max;
+        }
+    } else {
+        if (coeff_R >= 0.0) {
+            sgn_coeff_R = 1.0;
+        } else {
+            sgn_coeff_R = -1.0;
+        }
+        coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
+        if (coeff_bigA != 0.0) {
+            root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
+        } else {
+            root_choice = - coeff_aa / 3.0;
+        }
+    }
+    
+    root_choice = root_choice - 
+        (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) 
+            / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
+    root_choice = root_choice - 
+        (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) 
+            / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
+
+
+### GD-STEP 6: FINISH UP THE ITERATION
+
+    freeVars = freeVars + root_choice * dirVFrees;
+    params   = params   + root_choice * dirVParams;
+
+    root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice;
+    if (- root_int_diff < 0.00000001 * L_0) {
+        is_enough_gradient_descent = 1;
+    }
+    gd_iter = gd_iter + 1;
+    print ("Grad Descent Iter " + gd_iter + ":  L = " + (L_0 + root_int_diff) + ";  shift = " + root_choice);
+    shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T));
+}
+
+
+print ("Gradient Descent finished.  Starting MCMC...");
+
+
+
+
+END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT  */
+
+
+
+
+
+
+
+
+print ("Performing MCMC initialization...");
+
+reports = CReps %*% freeVars + dReps;
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params  + RegresCoeffDefault;
+
+bilinear_vector = regresValues * regresParams;
+bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+
+max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
+dim_sample = num_frees + num_params;
+sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
+
+# Generate a random permutation matrix for the sampling order of freeVars and params
+
+SampleOrder = diag (sample_ones);
+num_swaps = 10 * dim_sample;
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+left_swap  = round (0.5 + dim_sample * rnd);
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+right_swap = round (0.5 + dim_sample * rnd);
+for (swap_i in 1:num_swaps) {
+    l = castAsScalar (left_swap  [swap_i, 1]);
+    r = castAsScalar (right_swap [swap_i, 1]);
+    if (l != r) {
+        tmp_row = SampleOrder [l, ];
+        SampleOrder [l, ] = SampleOrder [r, ];
+        SampleOrder [r, ] = tmp_row;
+    }
+}
+
+pi = 3.1415926535897932384626433832795;
+zero = matrix (0.0, rows = 1, cols = 1);
+
+isVar = colSums (SampleOrder [1 : num_frees, ]);
+sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
+num_of_observed_reports = 0;
+sum_of_observed_losses = 0.0;
+is_observed = 0;
+
+is_calculating_loss_change = 0;
+is_monitoring_loss_change = 0;
+avg_prob_of_loss_increase = 0;
+update_factor_for_avg_loss_change = 0.02;
+avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
+old_bilinear_form_value = bilinear_form_value;
+
+# Starting MCMC iterations
+
+iter = 0;
+
+while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
+{
+    iter = iter + 1;
+
+    # Initialize (bi-)linear forms
+    
+    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    bilinear_form_vector = regresValues * regresParams;
+    
+    bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+    
+    if (bilinear_form_value > old_bilinear_form_value) {
+        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
+    } else {
+        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
+    }
+    if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
+        is_calculating_loss_change = 1;
+    }
+    if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
+        is_calculating_loss_change = 1;
+        is_monitoring_loss_change = 1;
+        print ("Monitoring the average loss change is ON.        ");
+    }
+    if (is_calculating_loss_change == 1) {
+        avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) 
+            + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
+    }
+    if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
+        print ("Burn-in ENDS, observation STARTS.        ");
+        is_observed = 1;
+    }
+    
+    old_bilinear_form_value = bilinear_form_value;
+    
+    bilinear_form_value_to_print = bilinear_form_value;
+    if (bilinear_form_value < 100000) {
+        bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
+    } else {
+    if (bilinear_form_value < 1000000000) {
+        bilinear_form_value_to_print = round (bilinear_form_value);
+    }}
+
+    if (is_monitoring_loss_change == 0) {
+        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
+            + ",  bilinear form value = " + bilinear_form_value_to_print);
+    } else {
+        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) 
+            + ",  bilinear form value = " + bilinear_form_value_to_print + ",  avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
+    }
+    
+    # Create a normally distributed random sample
+    
+    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
+    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
+    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
+    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
+    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
+    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
+        
+    # Initialize updaters
+    
+    freeVars_updater = freeVars * 0.0;
+    params_updater = params * 0.0;
+    regresValues_updater = regresValues * 0.0;
+    regresParams_updater = regresParams * 0.0;
+    bilinear_updater_vector = bilinear_form_vector * 0.0;
+    
+    # Perform the sampling
+
+    for (idx in 1:dim_sample)
+    {
+        # Generate the sample unit-vector and updaters
+        
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars_updater = SampleOrder [1 : num_frees, idx];
+            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
+            bilinear_updater_vector = regresValues_updater * regresParams;
+        } else {
+            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
+            regresParams_updater = RegresParamMap %*% params_updater;
+            bilinear_updater_vector = regresValues * regresParams_updater;
+        }
+        bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+            
+        # Compute the quadratic by three shift-points: -1, 0, +1
+
+        bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
+        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
+        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
+        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
+
+        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
+        # then compute the shift to get the new sample
+            
+        mean_shift  = - coeff_b / (2.0 * coeff_a);
+        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
+        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
+            
+# BEGIN DEBUG INSERT
+# mmm = 1;
+# if (castAsScalar (isVar [1, idx]) > 0.5 &          # IT IS A FREE VARIABLE, NOT A PARAMETER
+#     castAsScalar (freeVars_updater [mmm, 1]) > 0)  # IT IS mmm-TH FREE VARIABLE
+# {
+# #   print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
+#     print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
+# }
+# if (castAsScalar (isVar [1, idx]) <= 0.5 &       # IT IS A PARAMETER, NOT A FREE VARIABLE
+#     castAsScalar (params_updater [mmm, 1]) > 0)  # IT IS mmm-TH PARAMETER
+# {
+# #   print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
+#     print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
+# }
+# END DEBUG INSERT
+
+        # Perform the updates
+
+        bilinear_form = bilinear_form + shift * bilinear_updater;
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars = freeVars + shift * freeVars_updater;
+            regresValues = regresValues + shift * regresValues_updater;
+        } else {
+            params = params + shift * params_updater;
+            regresParams = regresParams + shift * regresParams_updater;
+        }
+    }
+    
+    # Update / adjust the reports and the parameters
+    
+    reports = CReps %*% freeVars + dReps;
+    reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
+        
+    # Make an observation of the reports and/or the parameters
+    
+    if (is_observed > 0)
+    {
+        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
+        num_of_observed_reports = num_of_observed_reports + 1;
+
+        sum_of_observed_params = sum_of_observed_params + params;
+        sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
+    }
+
+# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
+# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
+# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
+# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
+# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
+# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
+# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
+# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
+
+# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
+# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
+# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
+# v10=castAsScalar(round(reports_matrix[10,num_terms]));
+# v16=castAsScalar(round(reports_matrix[16,num_terms]));
+# v19=castAsScalar(round(reports_matrix[19,num_terms]));
+
+#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
+## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
+#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
+## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
+
+}
+
+print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
+print ("Writing out the results...");
+
+avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
+avg_params = sum_of_observed_params / num_of_observed_reports;
+write (avg_reports_matrix, $11, format="text");
+write (avg_params, $12, format="text");
+
+print ("END ImputeGaussMCMC");


[51/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
[SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Closes #49.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/816e2db8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/816e2db8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/816e2db8

Branch: refs/heads/branch-0.9
Commit: 816e2db839191c480c870b3ff6755de5190a203a
Parents: cf5144e
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Jan 22 08:31:35 2016 -0800
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Jan 22 08:31:35 2016 -0800

----------------------------------------------------------------------
 .gitattributes                                  |   13 +
 docs/devdocs/MatrixMultiplicationOperators.txt  |  256 +-
 scripts/algorithms/ALS-DS.dml                   |  338 +--
 scripts/algorithms/ALS_topk_predict.dml         |  252 +-
 scripts/algorithms/Cox-predict.dml              |  362 +--
 scripts/algorithms/Cox.dml                      | 1004 +++----
 scripts/algorithms/GLM-predict.dml              |  888 +++---
 scripts/algorithms/GLM.dml                      | 2334 +++++++--------
 scripts/algorithms/KM.dml                       | 1236 ++++----
 scripts/algorithms/Kmeans-predict.dml           |  678 ++---
 scripts/algorithms/Kmeans.dml                   |  564 ++--
 scripts/algorithms/LinearRegCG.dml              |  572 ++--
 scripts/algorithms/LinearRegDS.dml              |  448 +--
 scripts/algorithms/MultiLogReg.dml              |  730 ++---
 scripts/algorithms/PCA.dml                      |  224 +-
 scripts/algorithms/StepGLM.dml                  | 2390 +++++++--------
 scripts/algorithms/StepLinearRegDS.dml          |  776 ++---
 scripts/algorithms/Univar-Stats.dml             |  300 +-
 scripts/algorithms/bivar-stats.dml              |  796 ++---
 scripts/algorithms/decision-tree-predict.dml    |  284 +-
 scripts/algorithms/l2-svm.dml                   |  318 +-
 scripts/algorithms/m-svm-predict.dml            |  168 +-
 scripts/algorithms/m-svm.dml                    |  348 +--
 scripts/algorithms/random-forest-predict.dml    |  386 +--
 scripts/algorithms/random-forest.dml            | 2750 +++++++++---------
 scripts/algorithms/stratstats.dml               |  792 ++---
 scripts/datagen/genCorrelatedData.dml           |   92 +-
 scripts/datagen/genRandData4ChisquaredTest.dml  |  174 +-
 scripts/datagen/genRandData4DecisionTree1.dml   |   78 +-
 scripts/datagen/genRandData4DecisionTree2.dml   |   80 +-
 scripts/datagen/genRandData4FTest.dml           |  190 +-
 scripts/datagen/genRandData4Kmeans.dml          |  240 +-
 .../datagen/genRandData4LinearRegression.dml    |  122 +-
 scripts/datagen/genRandData4LogReg_LTstats.dml  |  466 +--
 scripts/datagen/genRandData4MultiClassSVM.dml   |  136 +-
 scripts/datagen/genRandData4NMF.dml             |  258 +-
 scripts/datagen/genRandData4NMFBlockwise.dml    |  276 +-
 scripts/datagen/genRandData4SurvAnalysis.dml    |  266 +-
 scripts/datagen/genRandData4Transform.dml       |  192 +-
 scripts/datagen/genRandData4Univariate.dml      |  116 +-
 scripts/staging/PPCA.dml                        |  320 +-
 scripts/staging/regression/lasso/lasso.dml      |  226 +-
 scripts/utils/project.dml                       |  160 +-
 scripts/utils/rowIndexMax.dml                   |   76 +-
 scripts/utils/splitXY.dml                       |  124 +-
 scripts/utils/write.dml                         |   78 +-
 .../org/apache/sysml/lops/BinaryScalar.java     |  394 +--
 .../java/org/apache/sysml/parser/antlr4/Dml.g4  |  402 +--
 .../controlprogram/parfor/RemoteDPParForMR.java |  584 ++--
 .../parfor/RemoteDPParWorkerReducer.java        |  744 ++---
 .../parfor/RemoteParForUtils.java               |  532 ++--
 .../parfor/opt/PerfTestToolRegression.dml       |  114 +-
 .../spark/data/BroadcastObject.java             |  128 +-
 .../instructions/spark/data/LineageObject.java  |  166 +-
 .../instructions/spark/data/RDDObject.java      |  248 +-
 .../matrix/mapred/GroupedAggMRCombiner.java     |  334 +--
 .../matrix/sort/IndexSortComparable.java        |  168 +-
 .../matrix/sort/IndexSortComparableDesc.java    |   98 +-
 .../runtime/matrix/sort/IndexSortMapper.java    |  150 +-
 .../runtime/matrix/sort/IndexSortReducer.java   |  206 +-
 .../matrix/sort/IndexSortStitchupMapper.java    |  282 +-
 .../matrix/sort/IndexSortStitchupReducer.java   |  148 +-
 .../runtime/matrix/sort/ValueSortMapper.java    |  196 +-
 .../runtime/matrix/sort/ValueSortReducer.java   |  136 +-
 .../sysml/runtime/transform/ApplyTfBBMR.java    |  312 +-
 .../runtime/transform/ApplyTfBBMapper.java      |  302 +-
 .../sysml/runtime/transform/ApplyTfCSVMR.java   |  256 +-
 .../runtime/transform/ApplyTfCSVMapper.java     |  224 +-
 .../runtime/transform/ApplyTfCSVSPARK.java      |  320 +-
 .../sysml/runtime/transform/BinAgent.java       |  710 ++---
 .../sysml/runtime/transform/DistinctValue.java  |  216 +-
 .../sysml/runtime/transform/DummycodeAgent.java |  852 +++---
 .../sysml/runtime/transform/GTFMTDMapper.java   |  214 +-
 .../sysml/runtime/transform/GTFMTDReducer.java  |  248 +-
 .../sysml/runtime/transform/GenTfMtdMR.java     |  212 +-
 .../sysml/runtime/transform/GenTfMtdSPARK.java  |  470 +--
 .../sysml/runtime/transform/MVImputeAgent.java  | 1954 ++++++-------
 .../sysml/runtime/transform/OmitAgent.java      |  246 +-
 .../sysml/runtime/transform/RecodeAgent.java    |    2 +-
 .../apache/sysml/runtime/transform/TfUtils.java | 1098 +++----
 .../runtime/transform/TransformationAgent.java  |  186 +-
 src/main/standalone/runStandaloneSystemML.bat   |  100 +-
 .../apply-transform/apply-transform.dml         |  310 +-
 .../apply-transform/apply-transform.pydml       |  290 +-
 .../applications/arima_box-jenkins/arima.dml    |  574 ++--
 .../applications/arima_box-jenkins/arima.pydml  |  516 ++--
 .../applications/ctableStats/Binomial.dml       |  342 +--
 .../scripts/applications/ctableStats/ctci.dml   |  290 +-
 .../applications/ctableStats/ctci_odds.dml      |  356 +--
 .../applications/ctableStats/stratstats.dml     |  700 ++---
 .../applications/ctableStats/wilson_score.dml   |  290 +-
 .../applications/ctableStats/zipftest.dml       |  154 +-
 .../applications/descriptivestats/Categorical.R |  114 +-
 .../descriptivestats/Categorical.dml            |  110 +-
 .../descriptivestats/CategoricalCategorical.R   |   98 +-
 .../descriptivestats/CategoricalCategorical.dml |  112 +-
 .../CategoricalCategoricalWithWeightsTest.R     |  134 +-
 .../CategoricalCategoricalWithWeightsTest.dml   |  120 +-
 .../applications/descriptivestats/IQMTest.dml   |   68 +-
 .../applications/descriptivestats/OddsRatio.R   |  150 +-
 .../applications/descriptivestats/OddsRatio.dml |  228 +-
 .../descriptivestats/OrdinalOrdinal.R           |   76 +-
 .../descriptivestats/OrdinalOrdinal.dml         |  148 +-
 .../OrdinalOrdinalWithWeightsTest.R             |   92 +-
 .../OrdinalOrdinalWithWeightsTest.dml           |  156 +-
 .../descriptivestats/QuantileTest.dml           |   66 +-
 .../applications/descriptivestats/Scale.R       |  282 +-
 .../applications/descriptivestats/Scale.dml     |  228 +-
 .../descriptivestats/ScaleCategorical.R         |  138 +-
 .../descriptivestats/ScaleCategorical.dml       |  124 +-
 .../ScaleCategoricalWithWeightsTest.R           |  156 +-
 .../ScaleCategoricalWithWeightsTest.dml         |  130 +-
 .../applications/descriptivestats/ScaleScale.R  |   76 +-
 .../descriptivestats/ScaleScale.dml             |   96 +-
 .../ScaleScalePearsonRWithWeightsTest.R         |   90 +-
 .../ScaleScalePearsonRWithWeightsTest.dml       |  102 +-
 .../descriptivestats/SimpleQuantileTest.dml     |   68 +-
 .../descriptivestats/WeightedCategoricalTest.R  |  126 +-
 .../WeightedCategoricalTest.dml                 |  112 +-
 .../descriptivestats/WeightedScaleTest.R        |  310 +-
 .../descriptivestats/WeightedScaleTest.dml      |  250 +-
 src/test/scripts/applications/glm/GLM.R         |  282 +-
 src/test/scripts/applications/glm/GLM.dml       | 2334 +++++++--------
 src/test/scripts/applications/id3/id3.R         |  484 +--
 .../applications/impute/imputeGaussMCMC.dml     | 1374 ++++-----
 .../impute/imputeGaussMCMC.nogradient.dml       |  906 +++---
 .../applications/impute/old/imputeGaussMCMC.dml |  840 +++---
 .../impute/old/wfundInputGenerator.dml          |  806 ++---
 .../impute/test/testInputGenerator.dml          |  304 +-
 .../testShadowRecurrenceInputGenerator.dml      |  348 +--
 src/test/scripts/applications/impute/tmp.dml    |  254 +-
 .../wfundInputGenerator.The0thReportAttempt.dml | 1002 +++----
 .../wfundInputGenerator.pre2013-08-26.dml       |  884 +++---
 .../impute/wfundInputGenerator1.dml             |  938 +++---
 .../impute/wfundInputGenerator2.dml             |  892 +++---
 src/test/scripts/applications/kmeans/Kmeans.dml |  216 +-
 src/test/scripts/applications/l2svm/L2SVM.R     |  206 +-
 src/test/scripts/applications/l2svm/L2SVM.dml   |  248 +-
 src/test/scripts/applications/l2svm/L2SVM.pydml |  238 +-
 .../scripts/applications/l2svm/L2SVMTest.Rt     |  142 +-
 .../scripts/applications/l2svm/L2SVMTest.dmlt   |  160 +-
 .../applications/linearLogReg/LinearLogReg.R    |  434 +--
 .../applications/linearLogReg/LinearLogReg.dml  |  462 +--
 .../linearLogReg/LinearLogReg.pydml             |  428 +--
 src/test/scripts/applications/m-svm/m-svm.R     |  240 +-
 src/test/scripts/applications/m-svm/m-svm.dml   |  290 +-
 src/test/scripts/applications/m-svm/m-svm.pydml |  272 +-
 .../applications/mdabivar/MDABivariateStats.R   |  588 ++--
 .../applications/mdabivar/MDABivariateStats.dml |  520 ++--
 .../mdabivar/MDABivariateStats.pydml            |  488 ++--
 .../naive-bayes-parfor/naive-bayes.R            |  142 +-
 .../naive-bayes-parfor/naive-bayes.dml          |  156 +-
 .../naive-bayes-parfor/naive-bayes.pydml        |  158 +-
 .../applications/naive-bayes/naive-bayes.R      |  142 +-
 .../applications/naive-bayes/naive-bayes.dml    |  134 +-
 .../applications/naive-bayes/naive-bayes.pydml  |  142 +-
 .../applications/parfor/parfor_bivariate.R      |  310 +-
 .../applications/parfor/parfor_bivariate0.dml   |  528 ++--
 .../applications/parfor/parfor_bivariate1.dml   |  512 ++--
 .../applications/parfor/parfor_bivariate2.dml   |  512 ++--
 .../applications/parfor/parfor_bivariate3.dml   |  512 ++--
 .../applications/parfor/parfor_bivariate4.dml   |  516 ++--
 .../scripts/applications/parfor/parfor_corr.R   |   96 +-
 .../applications/parfor/parfor_corr0.dml        |  100 +-
 .../applications/parfor/parfor_corr1.dml        |   98 +-
 .../applications/parfor/parfor_corr2.dml        |   98 +-
 .../applications/parfor/parfor_corr3.dml        |  100 +-
 .../applications/parfor/parfor_corr4.dml        |  100 +-
 .../applications/parfor/parfor_corr5.dml        |  100 +-
 .../applications/parfor/parfor_corr6.dml        |  100 +-
 .../applications/parfor/parfor_corr_large.R     |   96 +-
 .../applications/parfor/parfor_corr_large1.dml  |  100 +-
 .../applications/parfor/parfor_corr_large2.dml  |  100 +-
 .../applications/parfor/parfor_corr_large3.dml  |  100 +-
 .../applications/parfor/parfor_corr_large4.dml  |  100 +-
 .../parfor/parfor_cv_multiclasssvm.R            |  582 ++--
 .../parfor/parfor_cv_multiclasssvm0.dml         |  548 ++--
 .../parfor/parfor_cv_multiclasssvm1.dml         |  548 ++--
 .../parfor/parfor_cv_multiclasssvm4.dml         |  548 ++--
 .../applications/parfor/parfor_naive-bayes.R    |  122 +-
 .../applications/parfor/parfor_univariate.R     |  310 +-
 .../applications/parfor/parfor_univariate0.dml  |  332 +--
 .../applications/parfor/parfor_univariate1.dml  |  332 +--
 .../applications/parfor/parfor_univariate4.dml  |  332 +--
 .../validation/LinearLogisticRegression.dml     |  492 ++--
 .../genRandData4LogisticRegression.dml          |  244 +-
 .../applications/welchTTest/welchTTest.R        |   98 +-
 .../applications/welchTTest/welchTTest.dml      |   86 +-
 .../applications/welchTTest/welchTTest.pydml    |   86 +-
 src/test/scripts/functions/aggregate/AllMax.R   |   56 +-
 src/test/scripts/functions/aggregate/AllMean.R  |   56 +-
 src/test/scripts/functions/aggregate/AllMin.R   |   56 +-
 src/test/scripts/functions/aggregate/AllProd.R  |   56 +-
 src/test/scripts/functions/aggregate/AllSum.R   |   56 +-
 src/test/scripts/functions/aggregate/ColMaxs.R  |   66 +-
 src/test/scripts/functions/aggregate/ColMeans.R |   56 +-
 src/test/scripts/functions/aggregate/ColMins.R  |   66 +-
 src/test/scripts/functions/aggregate/ColSums.R  |   56 +-
 src/test/scripts/functions/aggregate/DiagSum.R  |   56 +-
 .../functions/aggregate/GroupedAggregate.R      |  120 +-
 .../aggregate/GroupedAggregateMatrix.R          |  138 +-
 .../aggregate/GroupedAggregateWeights.R         |  140 +-
 .../scripts/functions/aggregate/RowIndexMaxs.R  |   54 +-
 .../scripts/functions/aggregate/RowIndexMins.R  |   54 +-
 .../functions/aggregate/RowIndexMins.dml        |   48 +-
 src/test/scripts/functions/aggregate/RowMaxs.R  |   66 +-
 src/test/scripts/functions/aggregate/RowMeans.R |   56 +-
 src/test/scripts/functions/aggregate/RowMins.R  |   66 +-
 src/test/scripts/functions/aggregate/RowSums.R  |   56 +-
 .../scripts/functions/append/AppendChainTest.R  |   68 +-
 .../functions/append/AppendChainTest.dml        |   54 +-
 .../scripts/functions/append/AppendMatrixTest.R |   60 +-
 .../functions/append/AppendMatrixTest.dml       |   50 +-
 .../scripts/functions/append/AppendVectorTest.R |   60 +-
 .../functions/append/AppendVectorTest.dml       |   50 +-
 .../functions/append/RBindCBindMatrixTest.R     |   62 +-
 .../functions/append/RBindCBindMatrixTest.dml   |   54 +-
 .../scripts/functions/append/RBindMatrixTest.R  |   62 +-
 .../functions/append/RBindMatrixTest.dml        |   54 +-
 .../functions/append/basic_string_append.dml    |   54 +-
 .../functions/append/loop_string_append.dml     |   62 +-
 .../functions/binary/matrix/BinUaggChain_Col.R  |   64 +-
 .../functions/binary/matrix/CentralMoment.R     |   72 +-
 .../functions/binary/matrix/Covariance.R        |   72 +-
 .../binary/matrix/DiagMatrixMultiplication.R    |   76 +-
 .../matrix/DiagMatrixMultiplicationTranspose.R  |   68 +-
 .../binary/matrix/ElementwiseModulusTest.dml    |   52 +-
 ...ElementwiseModulusVariableDimensionsTest.dml |   52 +-
 src/test/scripts/functions/binary/matrix/IQM.R  |   86 +-
 .../functions/binary/matrix/MapMultChain.R      |   68 +-
 .../binary/matrix/MapMultChainWeights.R         |   70 +-
 .../binary/matrix/MapMultChainWeights2.R        |   70 +-
 .../binary/matrix/MapMultLimitTest.dml          |   62 +-
 .../binary/matrix/MatrixVectorMultiplication.R  |   72 +-
 .../scripts/functions/binary/matrix/Median.R    |   68 +-
 .../functions/binary/matrix/OuterProduct.R      |   82 +-
 .../scripts/functions/binary/matrix/Quantile.R  |   70 +-
 .../binary/matrix/ScalarModulusTest.dml         |   78 +-
 .../matrix/TransposeMatrixMultiplication.R      |   68 +-
 .../matrix/TransposeMatrixMultiplicationMinus.R |   68 +-
 .../TransposeMatrixMultiplicationMinus.dml      |   54 +-
 .../functions/binary/matrix/UaggOuterChain.R    |   68 +-
 .../binary/matrix/UaggOuterChainColSums.R       |   68 +-
 .../binary/matrix/UaggOuterChainEquals.R        |   68 +-
 .../binary/matrix/UaggOuterChainEqualsColSums.R |   68 +-
 .../binary/matrix/UaggOuterChainEqualsSums.R    |   68 +-
 .../binary/matrix/UaggOuterChainGreater.R       |   28 +-
 .../matrix/UaggOuterChainGreaterColSums.R       |   28 +-
 .../binary/matrix/UaggOuterChainGreaterEquals.R |   28 +-
 .../matrix/UaggOuterChainGreaterEqualsColSums.R |   28 +-
 .../binary/matrix/UaggOuterChainLessEquals.R    |   28 +-
 .../matrix/UaggOuterChainLessEqualsColSums.R    |   28 +-
 .../binary/matrix/UaggOuterChainNotEquals.R     |   28 +-
 .../matrix/UaggOuterChainNotEqualsColSums.R     |   28 +-
 .../binary/matrix/UaggOuterChainSums.R          |   66 +-
 .../matrix/UltraSparseMatrixMultiplication.R    |   32 +-
 .../matrix/UltraSparseMatrixMultiplication2.R   |   32 +-
 .../scripts/functions/binary/matrix/ZipMMTest.R |   68 +-
 .../FullMatrixCellwiseOperation_Addition.R      |   34 +-
 .../FullMatrixCellwiseOperation_Division.R      |   34 +-
 ...FullMatrixCellwiseOperation_Multiplication.R |   34 +-
 .../FullMatrixCellwiseOperation_Substraction.R  |   34 +-
 ...lMatrixVectorColCellwiseOperation_Addition.R |   34 +-
 ...lMatrixVectorColCellwiseOperation_Division.R |   34 +-
 ...xVectorColCellwiseOperation_Multiplication.R |   34 +-
 ...rixVectorColCellwiseOperation_Substraction.R |   34 +-
 ...lMatrixVectorRowCellwiseOperation_Addition.R |   36 +-
 ...lMatrixVectorRowCellwiseOperation_Division.R |   36 +-
 ...xVectorRowCellwiseOperation_Multiplication.R |   36 +-
 ...rixVectorRowCellwiseOperation_Substraction.R |   36 +-
 .../FullVectorVectorCellwiseOperation.R         |   38 +-
 .../matrix_full_cellwise/Minus1MultTest.R       |   78 +-
 .../FullDistributedMatrixMultiplication.R       |   34 +-
 .../FullMatrixMultiplication.R                  |   34 +-
 .../matrix_full_other/FullMinMaxComparison.R    |   74 +-
 .../binary/matrix_full_other/FullPower.R        |   40 +-
 .../matrix_full_other/IntegerDivision_div.R     |   40 +-
 .../matrix_full_other/IntegerDivision_mod.R     |   40 +-
 .../binary/matrix_full_other/PPredMatrixTest.R  |   74 +-
 .../matrix_full_other/PPredScalarLeftTest.R     |   76 +-
 .../matrix_full_other/PPredScalarRightTest.R    |   76 +-
 .../TransposeSelfMatrixMultiplication1.R        |   22 +-
 .../TransposeSelfMatrixMultiplication2.R        |   22 +-
 .../binary/scalar/ModulusSingleTest.dml         |   16 +-
 .../functions/binary/scalar/ModulusTest.dml     |   46 +-
 src/test/scripts/functions/caching/export.dml   |    4 +-
 .../functions/data/RandRuntimePlatformTest.dml  |   52 +-
 .../data/RandRuntimePlatformTestPoisson.dml     |   54 +-
 src/test/scripts/functions/data/RandVarMinMax.R |   34 +-
 src/test/scripts/functions/data/Sample2.dml     |    6 +-
 src/test/scripts/functions/data/Sample3.dml     |    6 +-
 src/test/scripts/functions/data/Sample4.dml     |    6 +-
 src/test/scripts/functions/data/Sequence.R      |   16 +-
 src/test/scripts/functions/data/Sequence.dml    |    8 +-
 .../scripts/functions/data/Sequence2inputs.R    |   16 +-
 .../scripts/functions/data/Sequence2inputs.dml  |    8 +-
 src/test/scripts/functions/data/StrInit.dml     |   46 +-
 .../functions/data/WriteMMComplexTest.dml       |   20 +-
 src/test/scripts/functions/data/WriteMMTest.dml |   12 +-
 .../scripts/functions/external/DynProject.R     |   32 +-
 .../scripts/functions/external/DynProject.dml   |   20 +-
 .../scripts/functions/external/DynReadWrite.dml |   36 +-
 .../functions/external/FunctionExpressions1.dml |   32 +-
 .../functions/external/FunctionExpressions2.dml |   32 +-
 src/test/scripts/functions/external/Order.R     |   22 +-
 src/test/scripts/functions/external/Order1.dml  |   18 +-
 src/test/scripts/functions/external/Order2.dml  |   18 +-
 src/test/scripts/functions/external/Outlier.dml |   38 +-
 src/test/scripts/functions/external/kMeans.dml  |   40 +-
 src/test/scripts/functions/external/kMeans2.dml |   54 +-
 src/test/scripts/functions/gdfo/LinregCG.R      |   72 +-
 src/test/scripts/functions/gdfo/LinregCG.dml    |   70 +-
 src/test/scripts/functions/gdfo/LinregDS.R      |   42 +-
 src/test/scripts/functions/gdfo/LinregDS.dml    |   36 +-
 src/test/scripts/functions/gdfo/LinregDSsimpl.R |   26 +-
 .../scripts/functions/gdfo/LinregDSsimpl.dml    |   20 +-
 src/test/scripts/functions/gdfo/MMChainLoop.R   |   32 +-
 src/test/scripts/functions/gdfo/MMChainLoop.dml |   32 +-
 .../scripts/functions/indexing/Jdk7IssueTest.R  |   26 +-
 .../functions/indexing/Jdk7IssueTest.dml        |   22 +-
 .../functions/indexing/LeftIndexingScalarTest.R |   26 +-
 .../indexing/LeftIndexingScalarTest.dml         |   18 +-
 .../indexing/LeftIndexingSparseDenseTest.R      |   26 +-
 .../indexing/LeftIndexingSparseDenseTest.dml    |   14 +-
 .../indexing/LeftIndexingSparseSparseTest.R     |   26 +-
 .../indexing/LeftIndexingSparseSparseTest.dml   |   14 +-
 .../functions/indexing/LeftIndexingTest.R       |   44 +-
 .../functions/indexing/LeftIndexingTest.dml     |   20 +-
 .../indexing/RightIndexingMatrixTest.R          |   28 +-
 .../indexing/RightIndexingMatrixTest.dml        |   16 +-
 .../indexing/RightIndexingVectorTest.R          |   30 +-
 .../indexing/RightIndexingVectorTest.dml        |   16 +-
 .../scripts/functions/io/ScalarComputeWrite.dml |   64 +-
 src/test/scripts/functions/io/ScalarRead.dml    |   46 +-
 src/test/scripts/functions/io/ScalarWrite.dml   |   16 +-
 .../scripts/functions/io/SeqParReadTest.dml     |   12 +-
 .../scripts/functions/io/csv/ReadCSVTest_1.dml  |   54 +-
 .../scripts/functions/io/csv/ReadCSVTest_2.dml  |   54 +-
 .../scripts/functions/io/csv/ReadCSVTest_3.dml  |   54 +-
 .../scripts/functions/io/csv/WriteCSVTest.dml   |   56 +-
 src/test/scripts/functions/io/csv/csv_test.dml  |   14 +-
 src/test/scripts/functions/io/csv/csv_verify.R  |   28 +-
 .../scripts/functions/io/csv/csv_verify.dml     |   14 +-
 src/test/scripts/functions/io/csv/csv_verify2.R |   22 +-
 .../scripts/functions/io/csv/csvprop_read.dml   |   28 +-
 .../scripts/functions/io/csv/csvprop_write.dml  |   14 +-
 .../functions/io/csv/in/transfusion_1.data      | 1496 +++++-----
 .../io/csv/in/transfusion_1.data.single         | 1496 +++++-----
 .../io/csv/in/transfusion_2.data.single         | 1496 +++++-----
 .../io/csv/in/transfusion_2.data/part-0         | 1182 ++++----
 .../io/csv/in/transfusion_2.data/part-1         |  312 +-
 .../io/csv/in/transfusion_3.data.single         | 1496 +++++-----
 .../io/csv/in/transfusion_3.data/part-0         | 1380 ++++-----
 .../io/csv/in/transfusion_3.data/part-1         |  114 +-
 .../scripts/functions/io/csv/writecsv_verify.R  |   24 +-
 .../functions/io/matrixmarket/ReadMMTest_1.dml  |   20 +-
 .../functions/io/matrixmarket/ReadMMTest_2.dml  |   16 +-
 .../functions/io/matrixmarket/ReadMMTest_3.dml  |   16 +-
 .../functions/io/matrixmarket/in/ReadMMTest.mtx |   60 +-
 .../functions/io/matrixmarket/mm_test1.dml      |   14 +-
 .../functions/io/matrixmarket/mm_verify.R       |   22 +-
 .../functions/io/matrixmarket/mm_verify.dml     |   14 +-
 src/test/scripts/functions/jmlc/m-svm-score.R   |   44 +-
 src/test/scripts/functions/jmlc/m-svm-score.dml |   68 +-
 .../functions/jmlc/reuse-glm-predict.dml        |  766 ++---
 .../functions/jmlc/reuse-msvm-predict.dml       |  148 +-
 .../functions/misc/ForScalarAssignmentTest.dml  |   12 +-
 .../misc/IPALiteralReplacement_ForIf.R          |   52 +-
 .../misc/IPALiteralReplacement_ForIf.dml        |   40 +-
 .../misc/IPALiteralReplacement_While.R          |   50 +-
 .../misc/IPALiteralReplacement_While.dml        |   40 +-
 .../functions/misc/IPAScalarRecursion.dml       |   34 +-
 .../functions/misc/IPAUnknownRecursion.R        |   60 +-
 .../functions/misc/IPAUnknownRecursion.dml      |   46 +-
 .../functions/misc/IfScalarAssignmentTest.dml   |   18 +-
 .../misc/InvalidFunctionSignatureTest1.dml      |   60 +-
 .../misc/InvalidFunctionSignatureTest2.dml      |   60 +-
 .../scripts/functions/misc/LengthStringTest.dml |    8 +-
 .../functions/misc/LengthUnknownCSVTest.dml     |    4 +-
 .../functions/misc/LongOverflowForLoop.dml      |   22 +-
 .../scripts/functions/misc/LongOverflowMult.dml |   14 +-
 .../scripts/functions/misc/LongOverflowPlus.dml |   14 +-
 .../scripts/functions/misc/NcolStringTest.dml   |    8 +-
 .../functions/misc/NcolUnknownCSVTest.dml       |    4 +-
 .../scripts/functions/misc/NrowStringTest.dml   |    8 +-
 .../functions/misc/NrowUnknownCSVTest.dml       |    4 +-
 .../scripts/functions/misc/OuterExpandTest.R    |   76 +-
 .../scripts/functions/misc/OuterExpandTest.dml  |   66 +-
 .../scripts/functions/misc/PackageFunCall1.dml  |   50 +-
 .../scripts/functions/misc/PackageFunCall2.dml  |   52 +-
 .../scripts/functions/misc/PackageFunLib.dml    |   52 +-
 .../misc/ParForScalarAssignmentTest.dml         |   12 +-
 .../functions/misc/PrintExpressionTest1.dml     |   46 +-
 .../functions/misc/PrintExpressionTest2.dml     |   46 +-
 .../scripts/functions/misc/PrintMatrixTest.dml  |   72 +-
 .../functions/misc/ReadAfterWriteMatrix1.dml    |   14 +-
 .../functions/misc/ReadAfterWriteMatrix2.dml    |   18 +-
 .../functions/misc/ReadAfterWriteScalar1.dml    |   14 +-
 .../functions/misc/ReadAfterWriteScalar2.dml    |   18 +-
 .../functions/misc/RewriteColSumsMVMult.R       |   18 +-
 .../functions/misc/RewriteColSumsMVMult.dml     |    8 +-
 .../functions/misc/RewriteRowSumsMVMult.R       |   18 +-
 .../functions/misc/RewriteRowSumsMVMult.dml     |    8 +-
 .../functions/misc/RewriteSlicedMatrixMult.R    |   66 +-
 .../functions/misc/RewriteSlicedMatrixMult.dml  |   56 +-
 .../functions/misc/ScalarFunctionTest1.R        |   70 +-
 .../functions/misc/ScalarFunctionTest1.dml      |   56 +-
 .../functions/misc/ScalarFunctionTest2.R        |   72 +-
 .../functions/misc/ScalarFunctionTest2.dml      |   58 +-
 .../scripts/functions/misc/TableExpandTest.R    |   82 +-
 .../scripts/functions/misc/TableExpandTest.dml  |   66 +-
 .../misc/WhileScalarAssignmentTest.dml          |   16 +-
 src/test/scripts/functions/misc/castBoolean.dml |    8 +-
 src/test/scripts/functions/misc/castDouble.dml  |    8 +-
 src/test/scripts/functions/misc/castInteger.dml |    8 +-
 .../scripts/functions/misc/castMatrixScalar.dml |    8 +-
 .../scripts/functions/misc/castScalarMatrix.dml |    8 +-
 .../functions/misc/conditionalPredicateIf.dml   |   24 +-
 .../misc/conditionalPredicateWhile.dml          |   14 +-
 .../functions/misc/conditionalValidate1.dml     |    8 +-
 .../functions/misc/conditionalValidate2.dml     |   22 +-
 .../functions/misc/conditionalValidate3.dml     |   18 +-
 .../functions/misc/conditionalValidate4.dml     |   18 +-
 .../scripts/functions/misc/dt_change_1a.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1b.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1c.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1d.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1e.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1f.dml     |   24 +-
 .../scripts/functions/misc/dt_change_1g.dml     |   20 +-
 .../scripts/functions/misc/dt_change_1h.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2a.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2b.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2c.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2d.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2e.dml     |   20 +-
 .../scripts/functions/misc/dt_change_2f.dml     |   20 +-
 .../scripts/functions/misc/dt_change_3a.dml     |   24 +-
 .../scripts/functions/misc/dt_change_3b.dml     |   26 +-
 .../scripts/functions/misc/dt_change_3c.dml     |   24 +-
 .../scripts/functions/misc/dt_change_3d.dml     |   24 +-
 .../scripts/functions/misc/dt_change_3e.dml     |   24 +-
 .../scripts/functions/misc/dt_change_3f.dml     |   24 +-
 .../scripts/functions/misc/dt_change_4a.dml     |   14 +-
 .../scripts/functions/misc/dt_change_4b.dml     |   14 +-
 .../scripts/functions/misc/dt_change_4c.dml     |   28 +-
 .../scripts/functions/misc/dt_change_4d.dml     |   30 +-
 .../scripts/functions/misc/dt_change_4e.dml     |   16 +-
 .../scripts/functions/misc/dt_change_4f.dml     |   16 +-
 .../scripts/functions/misc/functionInlining.dml |   44 +-
 .../functions/misc/functionNoInlining.dml       |   52 +-
 .../functions/misc/function_chain_inlining.dml  |   26 +-
 .../misc/function_chain_non_inlining.dml        |   36 +-
 .../misc/function_recursive_inlining.dml        |   52 +-
 .../functions/misc/iterablePredicate.dml        |   34 +-
 .../scripts/functions/parfor/for_pred1a.dml     |   18 +-
 .../scripts/functions/parfor/for_pred1b.dml     |   18 +-
 .../scripts/functions/parfor/for_pred2a.dml     |   24 +-
 .../scripts/functions/parfor/for_pred2b.dml     |   26 +-
 .../scripts/functions/parfor/for_pred3a.dml     |   18 +-
 .../scripts/functions/parfor/for_pred3b.dml     |   18 +-
 src/test/scripts/functions/parfor/parfor1.dml   |   16 +-
 src/test/scripts/functions/parfor/parfor10.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor11.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor12.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor13.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor14.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor15.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor16.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor17.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor18.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor19.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor2.dml   |   24 +-
 src/test/scripts/functions/parfor/parfor20.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor21.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor22.dml  |   28 +-
 src/test/scripts/functions/parfor/parfor23.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor24.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor25.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor26.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor26b.dml |   18 +-
 src/test/scripts/functions/parfor/parfor26c.dml |   18 +-
 .../scripts/functions/parfor/parfor26c2.dml     |   18 +-
 src/test/scripts/functions/parfor/parfor26d.dml |   20 +-
 src/test/scripts/functions/parfor/parfor27.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor28.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor28b.dml |   36 +-
 src/test/scripts/functions/parfor/parfor28c.dml |   24 +-
 src/test/scripts/functions/parfor/parfor28d.dml |   24 +-
 src/test/scripts/functions/parfor/parfor28e.dml |   28 +-
 src/test/scripts/functions/parfor/parfor28f.dml |   34 +-
 src/test/scripts/functions/parfor/parfor28g.dml |   42 +-
 src/test/scripts/functions/parfor/parfor28h.dml |   40 +-
 src/test/scripts/functions/parfor/parfor29.dml  |   20 +-
 src/test/scripts/functions/parfor/parfor3.dml   |   18 +-
 src/test/scripts/functions/parfor/parfor30.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor31.dml  |   32 +-
 src/test/scripts/functions/parfor/parfor31b.dml |   32 +-
 src/test/scripts/functions/parfor/parfor32.dml  |   26 +-
 src/test/scripts/functions/parfor/parfor32b.dml |   24 +-
 src/test/scripts/functions/parfor/parfor32c.dml |   26 +-
 src/test/scripts/functions/parfor/parfor32d.dml |   18 +-
 src/test/scripts/functions/parfor/parfor32e.dml |   20 +-
 src/test/scripts/functions/parfor/parfor33.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor34.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor35.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor35b.dml |   60 +-
 src/test/scripts/functions/parfor/parfor35c.dml |   60 +-
 src/test/scripts/functions/parfor/parfor35d.dml |   60 +-
 src/test/scripts/functions/parfor/parfor36.dml  |   16 +-
 src/test/scripts/functions/parfor/parfor37.dml  |   16 +-
 src/test/scripts/functions/parfor/parfor38.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor39.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor4.dml   |   18 +-
 src/test/scripts/functions/parfor/parfor40.dml  |   32 +-
 src/test/scripts/functions/parfor/parfor41.dml  |   32 +-
 src/test/scripts/functions/parfor/parfor42.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor43.dml  |   18 +-
 src/test/scripts/functions/parfor/parfor44.dml  |   16 +-
 src/test/scripts/functions/parfor/parfor45.dml  |   32 +-
 src/test/scripts/functions/parfor/parfor46.dml  |   50 +-
 src/test/scripts/functions/parfor/parfor47.dml  |   50 +-
 src/test/scripts/functions/parfor/parfor48.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor48b.dml |   22 +-
 src/test/scripts/functions/parfor/parfor48c.dml |   20 +-
 src/test/scripts/functions/parfor/parfor49a.dml |   24 +-
 src/test/scripts/functions/parfor/parfor49b.dml |   46 +-
 src/test/scripts/functions/parfor/parfor5.dml   |   22 +-
 src/test/scripts/functions/parfor/parfor50.dml  |   98 +-
 src/test/scripts/functions/parfor/parfor51.dml  |   22 +-
 src/test/scripts/functions/parfor/parfor52.dml  |   24 +-
 src/test/scripts/functions/parfor/parfor6.dml   |   16 +-
 src/test/scripts/functions/parfor/parfor7.dml   |   18 +-
 src/test/scripts/functions/parfor/parfor8.dml   |   20 +-
 src/test/scripts/functions/parfor/parfor9.dml   |   20 +-
 src/test/scripts/functions/parfor/parfor_NaN1.R |   38 +-
 .../scripts/functions/parfor/parfor_NaN1.dml    |   28 +-
 src/test/scripts/functions/parfor/parfor_NaN2.R |   38 +-
 .../scripts/functions/parfor/parfor_NaN2.dml    |   28 +-
 .../parfor_cdatapartition_leftindexing.dml      |   28 +-
 .../functions/parfor/parfor_cdatapartitioning.R |   38 +-
 .../parfor/parfor_cdatapartitioning1.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning2.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning3.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning4.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning5.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning6.R          |   38 +-
 .../parfor/parfor_cdatapartitioning6.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning7.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning8.dml        |   28 +-
 .../parfor/parfor_cdatapartitioning9.dml        |   28 +-
 .../scripts/functions/parfor/parfor_extfunct.R  |   36 +-
 .../functions/parfor/parfor_extfunct.dml        |   48 +-
 .../scripts/functions/parfor/parfor_funct.R     |   36 +-
 .../scripts/functions/parfor/parfor_funct.dml   |   36 +-
 .../functions/parfor/parfor_literals1a.dml      |   26 +-
 .../functions/parfor/parfor_literals1b.dml      |   26 +-
 .../functions/parfor/parfor_literals1c.dml      |   32 +-
 .../functions/parfor/parfor_literals2.dml       |   26 +-
 .../functions/parfor/parfor_literals3.dml       |   22 +-
 .../functions/parfor/parfor_literals4a.dml      |   22 +-
 .../functions/parfor/parfor_literals4b.dml      |   22 +-
 .../functions/parfor/parfor_mdatapartitioning.R |   72 +-
 .../parfor/parfor_mdatapartitioning1.dml        |   50 +-
 .../parfor/parfor_mdatapartitioning2.dml        |   48 +-
 .../functions/parfor/parfor_optimizer1.R        |   68 +-
 .../functions/parfor/parfor_optimizer1.dml      |   62 +-
 .../functions/parfor/parfor_optimizer2.R        |  270 +-
 .../functions/parfor/parfor_optimizer2.dml      |  512 ++--
 .../functions/parfor/parfor_optimizer3.R        |   42 +-
 .../functions/parfor/parfor_optimizer3.dml      |   60 +-
 .../functions/parfor/parfor_pr_resultmerge1a.R  |   42 +-
 .../parfor/parfor_pr_resultmerge1a.dml          |   26 +-
 .../functions/parfor/parfor_pr_resultmerge1b.R  |   42 +-
 .../parfor/parfor_pr_resultmerge1b.dml          |   26 +-
 .../functions/parfor/parfor_pr_resultmerge1c.R  |   42 +-
 .../parfor/parfor_pr_resultmerge1c.dml          |   26 +-
 .../functions/parfor/parfor_pr_resultmerge1d.R  |   42 +-
 .../parfor/parfor_pr_resultmerge1d.dml          |   26 +-
 .../functions/parfor/parfor_pr_resultmerge2.R   |   46 +-
 .../functions/parfor/parfor_pr_resultmerge2.dml |   30 +-
 .../functions/parfor/parfor_pr_resultmerge32.R  |  166 +-
 .../parfor/parfor_pr_resultmerge32.dml          |  152 +-
 .../parfor_rdatapartition_leftindexing.dml      |   30 +-
 .../functions/parfor/parfor_rdatapartitioning.R |   38 +-
 .../parfor/parfor_rdatapartitioning1.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning2.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning3.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning4.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning5.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning6.R          |   38 +-
 .../parfor/parfor_rdatapartitioning6.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning7.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning8.dml        |   28 +-
 .../parfor/parfor_rdatapartitioning9.dml        |   28 +-
 .../functions/parfor/parfor_repeatedopt1.R      |   44 +-
 .../functions/parfor/parfor_repeatedopt1.dml    |   38 +-
 .../functions/parfor/parfor_repeatedopt2.R      |   54 +-
 .../functions/parfor/parfor_repeatedopt2.dml    |   48 +-
 .../functions/parfor/parfor_repeatedopt3.R      |   60 +-
 .../functions/parfor/parfor_repeatedopt3.dml    |   54 +-
 .../parfor/parfor_threadid_recompile1.dml       |   58 +-
 .../parfor/parfor_threadid_recompile2.dml       |   58 +-
 .../piggybacking/Piggybacking1_append.dml       |   62 +-
 .../piggybacking/Piggybacking1_mvmult.R         |   64 +-
 .../piggybacking/Piggybacking1_mvmult.dml       |   56 +-
 .../piggybacking/Piggybacking_eliminate.dml     |   72 +-
 .../functions/piggybacking/Piggybacking_iqm.dml |   64 +-
 .../quaternary/RewriteWeightedSigmoid.R         |   70 +-
 .../scripts/functions/quaternary/WeightedCeMM.R |   72 +-
 .../functions/quaternary/WeightedDivMMLeft.R    |   72 +-
 .../quaternary/WeightedDivMMMultBasic.R         |   72 +-
 .../quaternary/WeightedDivMMMultLeft.R          |   72 +-
 .../quaternary/WeightedDivMMMultMinusLeft.R     |   72 +-
 .../quaternary/WeightedDivMMMultMinusRight.R    |   72 +-
 .../quaternary/WeightedDivMMMultRight.R         |   72 +-
 .../functions/quaternary/WeightedDivMMRight.R   |   72 +-
 .../functions/quaternary/WeightedSigmoidP1.R    |   32 +-
 .../functions/quaternary/WeightedSigmoidP2.R    |   32 +-
 .../functions/quaternary/WeightedSigmoidP3.R    |   32 +-
 .../functions/quaternary/WeightedSigmoidP4.R    |   32 +-
 .../quaternary/WeightedSquaredLossNo.R          |   32 +-
 .../quaternary/WeightedSquaredLossNo2.R         |   74 +-
 .../quaternary/WeightedSquaredLossPost.R        |   34 +-
 .../quaternary/WeightedSquaredLossPost2.R       |   76 +-
 .../quaternary/WeightedSquaredLossPostNz.R      |   76 +-
 .../quaternary/WeightedSquaredLossPre.R         |   34 +-
 .../quaternary/WeightedSquaredLossPre2.R        |   76 +-
 .../quaternary/WeightedUnaryMMExpDiv.R          |   72 +-
 .../quaternary/WeightedUnaryMMExpMult.R         |   72 +-
 .../functions/quaternary/WeightedUnaryMMMult2.R |   72 +-
 .../functions/quaternary/WeightedUnaryMMPow2.R  |   72 +-
 .../recompile/LiteralReplaceCastScalar.dml      |   42 +-
 .../scripts/functions/recompile/append_nnz.dml  |   44 +-
 .../recompile/constant_propagation_if.R         |   40 +-
 .../recompile/constant_propagation_if.dml       |   26 +-
 .../recompile/constant_propagation_sb.R         |   52 +-
 .../recompile/constant_propagation_sb.dml       |   36 +-
 .../recompile/constant_propagation_while.R      |   44 +-
 .../recompile/constant_propagation_while.dml    |   30 +-
 .../functions/recompile/csv_read_unknown.dml    |   24 +-
 .../functions/recompile/for_recompile.dml       |   14 +-
 .../recompile/for_recompile_func_sparse.dml     |   64 +-
 .../recompile/for_recompile_sparse.dml          |   34 +-
 .../functions/recompile/funct_recompile.R       |   34 +-
 .../functions/recompile/funct_recompile.dml     |   58 +-
 .../functions/recompile/grpagg_rand_recompile.R |   22 +-
 .../recompile/grpagg_rand_recompile.dml         |   12 +-
 .../functions/recompile/if_branch_removal.R     |   52 +-
 .../functions/recompile/if_branch_removal.dml   |   48 +-
 .../functions/recompile/if_recompile.dml        |   14 +-
 .../recompile/if_recompile_func_sparse.dml      |   56 +-
 .../functions/recompile/if_recompile_sparse.dml |   26 +-
 .../recompile/multiple_function_calls1.R        |   36 +-
 .../recompile/multiple_function_calls1.dml      |   56 +-
 .../recompile/multiple_function_calls2.R        |   36 +-
 .../recompile/multiple_function_calls2.dml      |   56 +-
 .../recompile/multiple_function_calls3.R        |   36 +-
 .../recompile/multiple_function_calls3.dml      |   56 +-
 .../recompile/multiple_function_calls4.R        |   36 +-
 .../recompile/multiple_function_calls4.dml      |   56 +-
 .../recompile/multiple_function_calls5.R        |   36 +-
 .../recompile/multiple_function_calls5.dml      |   58 +-
 .../functions/recompile/multiple_reads.R        |   26 +-
 .../functions/recompile/multiple_reads.dml      |   16 +-
 .../functions/recompile/parfor_recompile.dml    |   14 +-
 .../recompile/parfor_recompile_func_sparse.dml  |   56 +-
 .../recompile/parfor_recompile_sparse.dml       |   24 +-
 .../functions/recompile/rand_recompile.dml      |   26 +-
 .../functions/recompile/rand_recompile2.dml     |   32 +-
 .../functions/recompile/rand_recompile3.dml     |   32 +-
 .../functions/recompile/rand_size_expr_eval.dml |   28 +-
 .../functions/recompile/rblk_recompile1.R       |   14 +-
 .../functions/recompile/rblk_recompile1.dml     |    4 +-
 .../functions/recompile/rblk_recompile2.R       |   18 +-
 .../functions/recompile/rblk_recompile2.dml     |    6 +-
 .../functions/recompile/rblk_recompile3.R       |   18 +-
 .../functions/recompile/rblk_recompile3.dml     |    8 +-
 .../recompile/remove_empty_potpourri1.R         |   18 +-
 .../recompile/remove_empty_potpourri1.dml       |    6 +-
 .../recompile/remove_empty_potpourri2.R         |   16 +-
 .../recompile/remove_empty_potpourri2.dml       |   14 +-
 .../recompile/remove_empty_potpourri3.R         |   26 +-
 .../recompile/remove_empty_potpourri3.dml       |   28 +-
 .../recompile/remove_empty_potpourri4.R         |   80 +-
 .../recompile/remove_empty_potpourri4.dml       |   82 +-
 .../recompile/remove_empty_recompile.R          |  106 +-
 .../recompile/remove_empty_recompile.dml        |  100 +-
 .../functions/recompile/rewrite_mapmultchain1.R |   28 +-
 .../recompile/rewrite_mapmultchain1.dml         |   28 +-
 .../functions/recompile/rewrite_mapmultchain2.R |   28 +-
 .../recompile/rewrite_mapmultchain2.dml         |   28 +-
 .../functions/recompile/while_recompile.dml     |   14 +-
 .../recompile/while_recompile_func_sparse.dml   |   62 +-
 .../recompile/while_recompile_sparse.dml        |   32 +-
 src/test/scripts/functions/reorg/DiagV2MTest.R  |   24 +-
 .../scripts/functions/reorg/DiagV2MTest.dml     |   12 +-
 .../scripts/functions/reorg/MatrixReshape1.R    |   24 +-
 .../scripts/functions/reorg/MatrixReshape1.dml  |    8 +-
 .../scripts/functions/reorg/MatrixReshape2.R    |   24 +-
 .../scripts/functions/reorg/MatrixReshape2.dml  |    8 +-
 src/test/scripts/functions/reorg/Order.R        |   46 +-
 src/test/scripts/functions/reorg/Order.dml      |   14 +-
 src/test/scripts/functions/reorg/OrderDyn.R     |   44 +-
 src/test/scripts/functions/reorg/OrderDyn.dml   |   18 +-
 src/test/scripts/functions/reorg/Reverse1.R     |   82 +-
 src/test/scripts/functions/reorg/Reverse1.dml   |   48 +-
 src/test/scripts/functions/reorg/Reverse2.R     |   82 +-
 src/test/scripts/functions/reorg/Reverse2.dml   |   48 +-
 src/test/scripts/functions/reorg/Transpose.R    |   24 +-
 src/test/scripts/functions/reorg/Transpose.dml  |    8 +-
 .../scripts/functions/ternary/CTableRowHist.R   |   46 +-
 .../functions/ternary/CTableSequenceLeft.R      |   26 +-
 .../functions/ternary/CTableSequenceRight.R     |   26 +-
 .../functions/ternary/CentralMomentWeights.R    |   34 +-
 .../functions/ternary/CovarianceWeights.R       |   34 +-
 src/test/scripts/functions/ternary/IQMWeights.R |   48 +-
 .../scripts/functions/ternary/MedianWeights.R   |   30 +-
 .../scripts/functions/ternary/QuantileWeights.R |   32 +-
 .../scripts/functions/ternary/TableOutputTest.R |   28 +-
 src/test/scripts/functions/transform/Apply.dml  |   18 +-
 src/test/scripts/functions/transform/Scaling.R  |   30 +-
 .../scripts/functions/transform/Scaling.dml     |   18 +-
 .../scripts/functions/transform/Transform.dml   |   18 +-
 .../functions/transform/Transform_colnames.dml  |   20 +-
 .../functions/transform/input/homes/homes.csv   |  402 +--
 .../transform/input/homes/homesAllMissing.csv   |  402 +--
 .../input/homes/homesAllMissing.tfidspec.json   |   10 +-
 .../input/homes/homesAllMissing.tfspec.json     |    8 +-
 .../input/homes/homesOmit.tfidspec.json         |   28 +-
 .../transform/input/homes/homesOmit.tfspec.json |   26 +-
 .../transform/input/homes2/homes.csv/homes1.csv |  196 +-
 .../transform/input/homes2/homes.csv/homes2.csv |  204 +-
 .../transform/input/homes2/homes.tfidspec.json  |   44 +-
 .../transform/input/homes2/homes.tfspec.json    |   42 +-
 .../transform/input/homes2/homes.tfspec2.json   |   42 +-
 .../functions/transform/input/iris/iris.csv     |  302 +-
 .../transform/input/iris/iris.transformed.csv   |  302 +-
 .../scripts/functions/unary/matrix/ACosTest.dml |   20 +-
 .../scripts/functions/unary/matrix/ASinTest.dml |   20 +-
 .../scripts/functions/unary/matrix/ATanTest.dml |   20 +-
 src/test/scripts/functions/unary/matrix/Ceil.R  |   24 +-
 .../scripts/functions/unary/matrix/Cummax.R     |   34 +-
 .../scripts/functions/unary/matrix/Cummin.R     |   34 +-
 .../scripts/functions/unary/matrix/Cumprod.R    |   34 +-
 .../scripts/functions/unary/matrix/Cumsum.R     |   34 +-
 src/test/scripts/functions/unary/matrix/Floor.R |   24 +-
 .../scripts/functions/unary/matrix/Inverse.R    |   26 +-
 .../scripts/functions/unary/matrix/Inverse.dml  |   18 +-
 src/test/scripts/functions/unary/matrix/Minus.R |   26 +-
 .../scripts/functions/unary/matrix/Minus.dml    |    8 +-
 .../scripts/functions/unary/matrix/QRsolve.R    |   88 +-
 .../scripts/functions/unary/matrix/QRsolve.dml  |   84 +-
 .../scripts/functions/unary/matrix/RoundTest.R  |   24 +-
 src/test/scripts/functions/unary/matrix/SProp.R |   26 +-
 .../scripts/functions/unary/matrix/SelPos.R     |   68 +-
 .../scripts/functions/unary/matrix/Sigmoid.R    |   26 +-
 src/test/scripts/functions/unary/matrix/Sign1.R |   66 +-
 src/test/scripts/functions/unary/matrix/Sign2.R |   66 +-
 .../scripts/functions/unary/matrix/eigen.dml    |   78 +-
 src/test/scripts/functions/unary/matrix/lu.dml  |   36 +-
 src/test/scripts/functions/unary/matrix/qr.dml  |  100 +-
 .../functions/unary/matrix/removeEmpty2.dml     |    8 +-
 .../functions/unary/matrix/removeEmpty3.dml     |   10 +-
 .../functions/unary/matrix/removeEmpty4.dml     |    6 +-
 .../functions/unary/matrix/replace_Infinity.R   |   26 +-
 .../functions/unary/matrix/replace_Infinity.dml |    8 +-
 .../functions/unary/matrix/replace_NInfinity.R  |   26 +-
 .../unary/matrix/replace_NInfinity.dml          |    8 +-
 .../functions/unary/matrix/replace_NaN.R        |   26 +-
 .../functions/unary/matrix/replace_NaN.dml      |    8 +-
 .../functions/unary/matrix/replace_maxmin.R     |   26 +-
 .../functions/unary/matrix/replace_maxmin.dml   |    6 +-
 .../functions/unary/matrix/replace_value.R      |   26 +-
 .../functions/unary/matrix/replace_value.dml    |    6 +-
 .../scripts/functions/unary/scalar/ACosTest.dml |   24 +-
 .../scripts/functions/unary/scalar/ASinTest.dml |   24 +-
 .../scripts/functions/unary/scalar/ATanTest.dml |   24 +-
 .../functions/unary/scalar/DFTest_CHISQ.R       |   32 +-
 .../functions/unary/scalar/DFTest_CHISQ.dml     |   24 +-
 .../scripts/functions/unary/scalar/DFTest_EXP.R |   32 +-
 .../functions/unary/scalar/DFTest_EXP.dml       |   24 +-
 .../unary/scalar/DFTest_EXP_NOPARAMS.R          |   32 +-
 .../unary/scalar/DFTest_EXP_NOPARAMS.dml        |   24 +-
 .../scripts/functions/unary/scalar/DFTest_F.R   |   32 +-
 .../scripts/functions/unary/scalar/DFTest_F.dml |   24 +-
 .../functions/unary/scalar/DFTest_NORMAL.R      |   32 +-
 .../functions/unary/scalar/DFTest_NORMAL.dml    |   24 +-
 .../functions/unary/scalar/DFTest_NORMAL_MEAN.R |   32 +-
 .../unary/scalar/DFTest_NORMAL_MEAN.dml         |   24 +-
 .../unary/scalar/DFTest_NORMAL_NOPARAMS.R       |   32 +-
 .../unary/scalar/DFTest_NORMAL_NOPARAMS.dml     |   24 +-
 .../functions/unary/scalar/DFTest_NORMAL_SD.R   |   32 +-
 .../functions/unary/scalar/DFTest_NORMAL_SD.dml |   24 +-
 .../scripts/functions/unary/scalar/DFTest_T.R   |   32 +-
 .../scripts/functions/unary/scalar/DFTest_T.dml |   24 +-
 .../functions/unary/scalar/PrintTest3.dml       |   60 +-
 .../functions/unary/scalar/StopTest2.dml        |   18 +-
 .../functions/unary/scalar/StopTestLoops_fn.dml |   44 +-
 .../unary/scalar/StopTestLoops_for.dml          |   30 +-
 .../unary/scalar/StopTestLoops_parfor.dml       |   22 +-
 .../unary/scalar/StopTestLoops_while.dml        |   32 +-
 .../vect/VectorizeForLoopBinaryColNeg.R         |   34 +-
 .../vect/VectorizeForLoopBinaryColPos.R         |   34 +-
 .../vect/VectorizeForLoopBinaryRowNeg.R         |   34 +-
 .../vect/VectorizeForLoopBinaryRowPos.R         |   34 +-
 .../vect/VectorizeForLoopLeftScalarColNeg.R     |   38 +-
 .../vect/VectorizeForLoopLeftScalarColPos.R     |   36 +-
 .../vect/VectorizeForLoopLeftScalarRowNeg.R     |   38 +-
 .../vect/VectorizeForLoopLeftScalarRowPos.R     |   36 +-
 .../vect/VectorizeForLoopRightScalarColNeg.R    |   38 +-
 .../vect/VectorizeForLoopRightScalarColPos.R    |   36 +-
 .../vect/VectorizeForLoopRightScalarRowNeg.R    |   38 +-
 .../vect/VectorizeForLoopRightScalarRowPos.R    |   36 +-
 .../vect/VectorizeForLoopUnaryColNeg.R          |   34 +-
 .../vect/VectorizeForLoopUnaryColPos.R          |   32 +-
 .../vect/VectorizeForLoopUnaryRowNeg.R          |   34 +-
 .../vect/VectorizeForLoopUnaryRowPos.R          |   32 +-
 .../scripts/functions/vect/VectorizeLixColNeg.R |   30 +-
 .../scripts/functions/vect/VectorizeLixColPos.R |   30 +-
 .../scripts/functions/vect/VectorizeLixRowNeg.R |   30 +-
 .../scripts/functions/vect/VectorizeLixRowPos.R |   32 +-
 .../scripts/functions/vect/VectorizeRixColNeg.R |   32 +-
 .../scripts/functions/vect/VectorizeRixColPos.R |   32 +-
 .../scripts/functions/vect/VectorizeRixRowNeg.R |   32 +-
 .../scripts/functions/vect/VectorizeRixRowPos.R |   32 +-
 826 files changed, 52535 insertions(+), 52522 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/.gitattributes
----------------------------------------------------------------------
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a8edefd
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,13 @@
+# Set the default behavior to have all files normalized to Unix-style
+# line endings upon check-in.
+* text=auto
+
+# Declare files that will always have CRLF line endings on checkout.
+*.bat text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.dll binary
+*.exp binary
+*.lib binary
+*.pdb binary
+*.exe binary

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/docs/devdocs/MatrixMultiplicationOperators.txt
----------------------------------------------------------------------
diff --git a/docs/devdocs/MatrixMultiplicationOperators.txt b/docs/devdocs/MatrixMultiplicationOperators.txt
index 7bc8a9c..962951c 100644
--- a/docs/devdocs/MatrixMultiplicationOperators.txt
+++ b/docs/devdocs/MatrixMultiplicationOperators.txt
@@ -1,128 +1,128 @@
-#####################################################################
-# TITLE: An Overview of Matrix Multiplication Operators in SystemML #
-# DATE MODIFIED: 11/21/2015                                         #
-#####################################################################
-
-In the following, we give an overview of backend-specific physical matrix multiplication operators in SystemML as well as their internally used matrix multiplication block operations.
-
-A) BASIC MATRIX MULT OPERATORS 
--------------------------------
-
-An AggBinaryOp hop can be compiled into the following physical operators.
-
-* 1) Physical Operators in CP (single node, control program)
-  - MM (basic matrix multiplication)                     --> mm
-  - MMChain (matrix multiplication chain)                --> mmchain
-  - TSMM (transpose-self matrix multiplication)          --> tsmm
-  - PMM (permutation matrix multiplication)              --> pmm
-
-* 2) Physical Operator in MR (distributed, mapreduce)
-  - MapMM (map-side matrix multiplication, w/|w/o agg)   --> mm
-  - MapMMChain (map-side matrix chain multiplication)    --> mmchain
-  - TSMM (map-side transpose-self matrix multiplication) --> tsmm
-  - PMM (map-side permutation matrix multiplication)     --> pmm
-  - CPMM (cross-product matrix multiplication, 2 jobs)   --> mm
-  - RMM (replication-based matrix multiplication, 1 job) --> mm
-
-* 3) Physical Operators in SPARK (distributed, spark)
-  - MapMM (see MR, flatmap/mappartitions/maptopair +     --> mm
-    reduce/reducebykey/no_aggregation)                   
-  - MapMMChain (see MR, mapvalues/maptopair + reduce)    --> mmchain
-  - TSMM (see MR, mapvalues + reduce)                    --> tsmm
-  - PMM (see MR, flatmaptopair + reducebykey)            --> pmm
-  - CPMM (see MR, 2 x maptopair + join + maptopair +     --> mm
-    reduce/reducebykey) 
-  - RMM (see MR, 2 x flatmap + join + maptopair +        --> mm
-    reducebykey) 
-  - ZIPMM (partitioning-preserving 1-1 zipping mm,       --> mm
-    join + mapvalues + reduce) 
-
-
-B) COMPLEX MATRIX MULT OPERATORS
--------------------------------  
-
-A QuaternaryOp hop can be compiled into the following physical operators. Note that wsloss, wsigmoid, wdivmm have different semantics though. The main goal of these operators is to prevent the creation of dense "outer" products via selective computation over a sparse driver (sparse matrix and sparse-safe operation).
- 
-* 1) Physical Operators in CP (single node, control program)
-  - WSLoss (weighted squared loss)                       --> wsloss
-  - WSigmoid (weighted sigmoid)                          --> wsigmoid
-  - WDivMM (weighted divide matrix multiplication)       --> wdivmm
-  - WCeMM (weighted cross entropy matrix multiplication) --> wcemm
-  - WuMM (weighted unary op matrix multiplication)       --> wumm
-
-* 2) Physical Operator in MR (distributed, mapreduce)
-  - MapWSLoss (map-side weighted squared loss)           --> wsloss
-  - RedWSLoss (reduce-side weighted squared loss)        --> wsloss
-  - MapWSigmoid (map-side weighted sigmoid)              --> wsigmoid
-  - RedWSigmoid (reduce-side weighted sigmoid)           --> wsigmoid
-  - MapWDivMM (map-side weighted divide matrix mult)     --> wdivmm
-  - RedWDivMM (reduce-side weighted divide matrix mult)  --> wdivmm
-  - MapWCeMM (map-side weighted cross entr. matrix mult) --> wcemm
-  - RedWCeMM (reduce-side w. cross entr. matrix mult)    --> wcemm
-  - MapWuMM (map-side weighted unary op matrix mult)     --> wumm
-  - RedWuMM (reduce-side weighted unary op matrix mult)  --> wumm
-
-* 3) Physical Operators in SPARK (distributed, spark)
-  - MapWSLoss (see MR, mappartitions + reduce)           --> wsloss           
-  - RedWSLoss (see MR, 1/2x flatmaptopair + 1-3x join +  --> wsloss
-    maptopair + reduce)
-  - MapWSigmoid (see MR, mappartitions)                  --> wsigmoid
-  - RedWSigmoid (see MR, 1/2x flatmaptopair +            --> wsigmoid
-    1/2x join + maptopair)          
-  - MapWDivMM (see MR, mappartitions + reducebykey )     --> wdivmm
-  - RedWDivMM (see MR, 1/2x flatmaptopair + 1/2x join +  --> wdivmm 
-    maptopair + reducebykey)  
-  - MapWCeMM (see MR, mappartitions + reduce)            --> wcemm           
-  - RedWCeMM (see MR, 1/2x flatmaptopair + 1/2x join +   --> wcemm 
-    maptopair + reduce)  
-  - MapWuMM (see MR, mappartitions)                      --> wumm
-  - RedWuMM (see MR, 1/2x flatmaptopair +                --> wumm
-    1/2x join + maptopair)          
-  
-  
-C) CORE MATRIX MULT PRIMITIVES LibMatrixMult (incl related script patterns)
--------------------------------  
-* 1) mm       (general A %*% B)
-  - sequential / multi-threaded (same block ops, par over rows in A)
-  - dense-dense, dense-sparse, sparse-dense, sparse-sparse, ultra-sparse*
-  - ~20 special cases for matrix-vector, vector-vector, etc
-  
-* 2) mmchain  ((a) t(X) %*% (X %*% v), (b) t(X) %*% (w * (X %*% v)))
-  - sequential / multi-threaded (same block ops, par over rows in X)
-  - dense / sparse x 2 patterns
-
-* 3) tsmm     ((a) t(X) %*% X, (b) X %*% t(X)
-  - sequential / multi-threaded (same block ops, par over rows in R, 2x tasks)
-  - dense / sparse x 2 patterns; special cases for dot products
-
-* 4) pmm      (removeEmpty(diag(v), "rows") %*% X)
-  - sequential / multi-threaded (same block ops, par over rows in X)
-  - sparse-sparse, dense-dense, sparse-dense
-
-* 5) wsloss   ((a) sum(W*(X-U%*%t(V))^2), (b) sum((X-W*(U%*%t(V)))^2), 
-               (c) sum((X-(U%*%t(V)))^2)), (d) sum(W*(U%*%t(V)-X)^2),
-               (e) sum((W*(U%*%t(V))-X)^2), (f) sum(((U%*%t(V))-X)^2))
-  - sequential / multi-threaded (same block ops, par over rows in W/X)                 
-  - all dense, sparse-dense factors, sparse/dense-* x 3 patterns      
-  - special patterns for (a) and (d) if W is X!=0      
-
-* 6) wsigmoid ((a) W*sigmoid(Y%*%t(X))), (b) W*sigmoid(-(Y%*%t(X))), 
-               (c) W*log(sigmoid(Y%*%t(X))), (d) W*log(sigmoid(-(Y%*%t(X))))) 
-  - sequential / multi-threaded (same block ops, par over rows in W)                 
-  - all dense, sparse-dense factors, sparse/dense-* x 4 patterns                   
-
-* 7) wdivmm   ((a) t(t(U)%*%(W/(U%*%t(V)))), (b) (W/(U%*%t(V)))%*%V,
-               (c) t(t(U)%*%(W*(U%*%t(V)))), (d) (W*(U%*%t(V)))%*%V, 
-               (e) W*(U%*%t(V)), (f) t(t(U)%*%((X!=0)*(U%*%t(V)-X))),
-               (g) ((X!=0)*(U%*%t(V)-X)%*%V)
-  - sequential / multi-threaded (same block ops, par over rows in X)                 
-  - all dense, sparse-dense factors, sparse/dense-* x 7 patterns
-
-* 8) wcemm    (sum(X*log(U%*%t(V))))  
-  - sequential / multi-threaded (same block ops, par over rows in X)                 
-  - all dense, sparse-dense factors, sparse/dense-*, 1 pattern
-
-* 9) wumm     ((a) X*uop(U%*%t(V)), (b) X/uop(U%*%t(V)))
-  - any unary operator, e.g., X*exp(U%*%t(V)) or X*(U%*%t(V))^2  
-  - sequential / multi-threaded (same block ops, par over rows in X)                 
-  - all dense, sparse-dense factors, sparse/dense-*, 2 pattern
+#####################################################################
+# TITLE: An Overview of Matrix Multiplication Operators in SystemML #
+# DATE MODIFIED: 11/21/2015                                         #
+#####################################################################
+
+In the following, we give an overview of backend-specific physical matrix multiplication operators in SystemML as well as their internally used matrix multiplication block operations.
+
+A) BASIC MATRIX MULT OPERATORS 
+-------------------------------
+
+An AggBinaryOp hop can be compiled into the following physical operators.
+
+* 1) Physical Operators in CP (single node, control program)
+  - MM (basic matrix multiplication)                     --> mm
+  - MMChain (matrix multiplication chain)                --> mmchain
+  - TSMM (transpose-self matrix multiplication)          --> tsmm
+  - PMM (permutation matrix multiplication)              --> pmm
+
+* 2) Physical Operator in MR (distributed, mapreduce)
+  - MapMM (map-side matrix multiplication, w/|w/o agg)   --> mm
+  - MapMMChain (map-side matrix chain multiplication)    --> mmchain
+  - TSMM (map-side transpose-self matrix multiplication) --> tsmm
+  - PMM (map-side permutation matrix multiplication)     --> pmm
+  - CPMM (cross-product matrix multiplication, 2 jobs)   --> mm
+  - RMM (replication-based matrix multiplication, 1 job) --> mm
+
+* 3) Physical Operators in SPARK (distributed, spark)
+  - MapMM (see MR, flatmap/mappartitions/maptopair +     --> mm
+    reduce/reducebykey/no_aggregation)                   
+  - MapMMChain (see MR, mapvalues/maptopair + reduce)    --> mmchain
+  - TSMM (see MR, mapvalues + reduce)                    --> tsmm
+  - PMM (see MR, flatmaptopair + reducebykey)            --> pmm
+  - CPMM (see MR, 2 x maptopair + join + maptopair +     --> mm
+    reduce/reducebykey) 
+  - RMM (see MR, 2 x flatmap + join + maptopair +        --> mm
+    reducebykey) 
+  - ZIPMM (partitioning-preserving 1-1 zipping mm,       --> mm
+    join + mapvalues + reduce) 
+
+
+B) COMPLEX MATRIX MULT OPERATORS
+-------------------------------  
+
+A QuaternaryOp hop can be compiled into the following physical operators. Note that wsloss, wsigmoid, wdivmm have different semantics though. The main goal of these operators is to prevent the creation of dense "outer" products via selective computation over a sparse driver (sparse matrix and sparse-safe operation).
+ 
+* 1) Physical Operators in CP (single node, control program)
+  - WSLoss (weighted squared loss)                       --> wsloss
+  - WSigmoid (weighted sigmoid)                          --> wsigmoid
+  - WDivMM (weighted divide matrix multiplication)       --> wdivmm
+  - WCeMM (weighted cross entropy matrix multiplication) --> wcemm
+  - WuMM (weighted unary op matrix multiplication)       --> wumm
+
+* 2) Physical Operator in MR (distributed, mapreduce)
+  - MapWSLoss (map-side weighted squared loss)           --> wsloss
+  - RedWSLoss (reduce-side weighted squared loss)        --> wsloss
+  - MapWSigmoid (map-side weighted sigmoid)              --> wsigmoid
+  - RedWSigmoid (reduce-side weighted sigmoid)           --> wsigmoid
+  - MapWDivMM (map-side weighted divide matrix mult)     --> wdivmm
+  - RedWDivMM (reduce-side weighted divide matrix mult)  --> wdivmm
+  - MapWCeMM (map-side weighted cross entr. matrix mult) --> wcemm
+  - RedWCeMM (reduce-side w. cross entr. matrix mult)    --> wcemm
+  - MapWuMM (map-side weighted unary op matrix mult)     --> wumm
+  - RedWuMM (reduce-side weighted unary op matrix mult)  --> wumm
+
+* 3) Physical Operators in SPARK (distributed, spark)
+  - MapWSLoss (see MR, mappartitions + reduce)           --> wsloss           
+  - RedWSLoss (see MR, 1/2x flatmaptopair + 1-3x join +  --> wsloss
+    maptopair + reduce)
+  - MapWSigmoid (see MR, mappartitions)                  --> wsigmoid
+  - RedWSigmoid (see MR, 1/2x flatmaptopair +            --> wsigmoid
+    1/2x join + maptopair)          
+  - MapWDivMM (see MR, mappartitions + reducebykey )     --> wdivmm
+  - RedWDivMM (see MR, 1/2x flatmaptopair + 1/2x join +  --> wdivmm 
+    maptopair + reducebykey)  
+  - MapWCeMM (see MR, mappartitions + reduce)            --> wcemm           
+  - RedWCeMM (see MR, 1/2x flatmaptopair + 1/2x join +   --> wcemm 
+    maptopair + reduce)  
+  - MapWuMM (see MR, mappartitions)                      --> wumm
+  - RedWuMM (see MR, 1/2x flatmaptopair +                --> wumm
+    1/2x join + maptopair)          
+  
+  
+C) CORE MATRIX MULT PRIMITIVES LibMatrixMult (incl related script patterns)
+-------------------------------  
+* 1) mm       (general A %*% B)
+  - sequential / multi-threaded (same block ops, par over rows in A)
+  - dense-dense, dense-sparse, sparse-dense, sparse-sparse, ultra-sparse*
+  - ~20 special cases for matrix-vector, vector-vector, etc
+  
+* 2) mmchain  ((a) t(X) %*% (X %*% v), (b) t(X) %*% (w * (X %*% v)))
+  - sequential / multi-threaded (same block ops, par over rows in X)
+  - dense / sparse x 2 patterns
+
+* 3) tsmm     ((a) t(X) %*% X, (b) X %*% t(X)
+  - sequential / multi-threaded (same block ops, par over rows in R, 2x tasks)
+  - dense / sparse x 2 patterns; special cases for dot products
+
+* 4) pmm      (removeEmpty(diag(v), "rows") %*% X)
+  - sequential / multi-threaded (same block ops, par over rows in X)
+  - sparse-sparse, dense-dense, sparse-dense
+
+* 5) wsloss   ((a) sum(W*(X-U%*%t(V))^2), (b) sum((X-W*(U%*%t(V)))^2), 
+               (c) sum((X-(U%*%t(V)))^2)), (d) sum(W*(U%*%t(V)-X)^2),
+               (e) sum((W*(U%*%t(V))-X)^2), (f) sum(((U%*%t(V))-X)^2))
+  - sequential / multi-threaded (same block ops, par over rows in W/X)                 
+  - all dense, sparse-dense factors, sparse/dense-* x 3 patterns      
+  - special patterns for (a) and (d) if W is X!=0      
+
+* 6) wsigmoid ((a) W*sigmoid(Y%*%t(X))), (b) W*sigmoid(-(Y%*%t(X))), 
+               (c) W*log(sigmoid(Y%*%t(X))), (d) W*log(sigmoid(-(Y%*%t(X))))) 
+  - sequential / multi-threaded (same block ops, par over rows in W)                 
+  - all dense, sparse-dense factors, sparse/dense-* x 4 patterns                   
+
+* 7) wdivmm   ((a) t(t(U)%*%(W/(U%*%t(V)))), (b) (W/(U%*%t(V)))%*%V,
+               (c) t(t(U)%*%(W*(U%*%t(V)))), (d) (W*(U%*%t(V)))%*%V, 
+               (e) W*(U%*%t(V)), (f) t(t(U)%*%((X!=0)*(U%*%t(V)-X))),
+               (g) ((X!=0)*(U%*%t(V)-X)%*%V)
+  - sequential / multi-threaded (same block ops, par over rows in X)                 
+  - all dense, sparse-dense factors, sparse/dense-* x 7 patterns
+
+* 8) wcemm    (sum(X*log(U%*%t(V))))  
+  - sequential / multi-threaded (same block ops, par over rows in X)                 
+  - all dense, sparse-dense factors, sparse/dense-*, 1 pattern
+
+* 9) wumm     ((a) X*uop(U%*%t(V)), (b) X/uop(U%*%t(V)))
+  - any unary operator, e.g., X*exp(U%*%t(V)) or X*(U%*%t(V))^2  
+  - sequential / multi-threaded (same block ops, par over rows in X)                 
+  - all dense, sparse-dense factors, sparse/dense-*, 2 pattern

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
index 1d0fce4..537c8ae 100644
--- a/scripts/algorithms/ALS-DS.dml
+++ b/scripts/algorithms/ALS-DS.dml
@@ -1,170 +1,170 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
-# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# V       String   ---      Location to read the input matrix V to be factorized
-# L       String   ---      Location to write the factor matrix L
-# R       String   ---      Location to write the factor matrix R
-# rank    Int      10       Rank of the factorization
-# reg     String   "L2"	    Regularization: 
-#						    "L2" = L2 regularization;
-#                           "wL2" = weighted L2 regularization
-# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
-# maxi    Int      50       Maximum number of iterations
-# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
-# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#							if check is FALSE thr is ignored
-# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- An m x r matrix L, where r is the factorization rank 
-# 2- An r x n matrix R
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileV      = $V;
-fileL	   = $L;
-fileR      = $R;
-
-# Default values of some parameters
-r          = ifdef ($rank, 10);	        # $rank=10;
-reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
-lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter   = ifdef ($maxi, 50);         # $maxi=50;
-check      = ifdef ($check, FALSE);	    # $check=FALSE;
-thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
-fmtO       = ifdef ($fmt, "text");      # $fmt="text";
-
-V = read (fileV);
-
-
-# check the input matrix V, if some rows or columns contain only zeros remove them from V  
-V_nonzero_ind = ppred (V, 0, "!=");
-row_nonzeros = rowSums (V_nonzero_ind);
-col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
-num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
-num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
-if (num_zero_rows > 0) {
-	print ("Matrix V contains empty rows! These rows will be removed.");
-	V = removeEmpty (target = V, margin = "rows");
-}
-if (num_zero_cols > 0) {
-	print ("Matrix V contains empty columns! These columns will be removed.");
-	V = removeEmpty (target = V, margin = "cols");
-}
-if (num_zero_rows > 0 | num_zero_cols > 0) {
-	print ("Recomputing nonzero rows and columns!");
-	V_nonzero_ind = ppred (V, 0, "!=");
-	row_nonzeros = rowSums (V_nonzero_ind);
-	col_nonzeros = t (colSums (V_nonzero_ind));	
-}
-
-###### MAIN PART ######
-m = nrow (V);
-n = ncol (V);
-
-# initializing factor matrices
-L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
-R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
-
-# initializing transformed matrices
-Vt = t(V);
-  
-# check for regularization
-if (reg == "L2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-} else if (reg == "wL2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-} else {
-	stop ("wrong regularization! " + reg);
-}
-
-if (check) {
-	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-	print ("-----   Initial train loss: " + loss_init + " -----");
-}
-
-lambda_I = diag (matrix (lambda, rows = r, cols = 1));
-it = 0;
-converged = FALSE;
-while ((it < max_iter) & (!converged)) {
-	it = it + 1;
-	# keep R fixed and update L
-	parfor (i in 1:m) {
-    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
-		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
-		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
-		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
-	}
-  
-	# keep L fixed and update R
-	parfor (j in 1:n) {
-		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
-		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
-		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
-		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
-	}
-	
-	# check for convergence
-	if (check) {
-		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-		loss_dec = (loss_init - loss_cur) / loss_init;
-		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
-		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
-			print ("----- ALS converged after " + it + " iterations!");
-			converged = TRUE;
-		}
-		loss_init = loss_cur;
-	}
-} # end of while loop
-
-if (check) {
-	print ("-----	Final train loss: " + loss_init + " -----");
-}
-
-if (!converged) {
-   print ("Max iteration achieved but not converged!");
-} 
-
-# inject 0s in L if original V had empty rows
-if (num_zero_rows > 0) {
-	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
-}
-# inject 0s in R if original V had empty rows
-if (num_zero_cols > 0) {
-	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
-}
-Rt = t (R);
-write (L, fileL, format=fmtO);
-write (Rt, fileR, format=fmtO);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
+# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# V       String   ---      Location to read the input matrix V to be factorized
+# L       String   ---      Location to write the factor matrix L
+# R       String   ---      Location to write the factor matrix R
+# rank    Int      10       Rank of the factorization
+# reg     String   "L2"	    Regularization: 
+#						    "L2" = L2 regularization;
+#                           "wL2" = weighted L2 regularization
+# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
+# maxi    Int      50       Maximum number of iterations
+# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
+# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#							if check is FALSE thr is ignored
+# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- An m x r matrix L, where r is the factorization rank 
+# 2- An r x n matrix R
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileV      = $V;
+fileL	   = $L;
+fileR      = $R;
+
+# Default values of some parameters
+r          = ifdef ($rank, 10);	        # $rank=10;
+reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
+lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter   = ifdef ($maxi, 50);         # $maxi=50;
+check      = ifdef ($check, FALSE);	    # $check=FALSE;
+thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
+fmtO       = ifdef ($fmt, "text");      # $fmt="text";
+
+V = read (fileV);
+
+
+# check the input matrix V, if some rows or columns contain only zeros remove them from V  
+V_nonzero_ind = ppred (V, 0, "!=");
+row_nonzeros = rowSums (V_nonzero_ind);
+col_nonzeros = t (colSums (V_nonzero_ind));
+orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
+orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
+num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
+if (num_zero_rows > 0) {
+	print ("Matrix V contains empty rows! These rows will be removed.");
+	V = removeEmpty (target = V, margin = "rows");
+}
+if (num_zero_cols > 0) {
+	print ("Matrix V contains empty columns! These columns will be removed.");
+	V = removeEmpty (target = V, margin = "cols");
+}
+if (num_zero_rows > 0 | num_zero_cols > 0) {
+	print ("Recomputing nonzero rows and columns!");
+	V_nonzero_ind = ppred (V, 0, "!=");
+	row_nonzeros = rowSums (V_nonzero_ind);
+	col_nonzeros = t (colSums (V_nonzero_ind));	
+}
+
+###### MAIN PART ######
+m = nrow (V);
+n = ncol (V);
+
+# initializing factor matrices
+L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
+R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
+
+# initializing transformed matrices
+Vt = t(V);
+  
+# check for regularization
+if (reg == "L2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+} else if (reg == "wL2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+} else {
+	stop ("wrong regularization! " + reg);
+}
+
+if (check) {
+	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+	print ("-----   Initial train loss: " + loss_init + " -----");
+}
+
+lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+it = 0;
+converged = FALSE;
+while ((it < max_iter) & (!converged)) {
+	it = it + 1;
+	# keep R fixed and update L
+	parfor (i in 1:m) {
+    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
+		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
+		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
+		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
+	}
+  
+	# keep L fixed and update R
+	parfor (j in 1:n) {
+		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
+		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
+		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
+	}
+	
+	# check for convergence
+	if (check) {
+		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+		loss_dec = (loss_init - loss_cur) / loss_init;
+		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
+		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+			print ("----- ALS converged after " + it + " iterations!");
+			converged = TRUE;
+		}
+		loss_init = loss_cur;
+	}
+} # end of while loop
+
+if (check) {
+	print ("-----	Final train loss: " + loss_init + " -----");
+}
+
+if (!converged) {
+   print ("Max iteration achieved but not converged!");
+} 
+
+# inject 0s in L if original V had empty rows
+if (num_zero_rows > 0) {
+	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
+}
+# inject 0s in R if original V had empty rows
+if (num_zero_cols > 0) {
+	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
+}
+Rt = t (R);
+write (L, fileL, format=fmtO);
+write (Rt, fileR, format=fmtO);
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/ALS_topk_predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS_topk_predict.dml b/scripts/algorithms/ALS_topk_predict.dml
index 9d3d2ae..9b7e476 100644
--- a/scripts/algorithms/ALS_topk_predict.dml
+++ b/scripts/algorithms/ALS_topk_predict.dml
@@ -1,126 +1,126 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES THE RATING/SCORE FOR A GIVEN LIST OF PAIRS: (USER-ID, ITEM-ID) USING 2 FACTOR MATRICES L AND R
-# WE ASSUME THAT ALL USERS HAVE RATED AT LEAST ONCE AND ALL ITEMS HAVE BEEN RATED AT LEAST ONCE.
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---      Location to read the input user-ids list
-# Y	 	  String   ---	    Location to write the output of top-K prediction: 
-#							 - top-K item-ids will be stored at Y
-#							 - the corresponding top-K ratings will be stored at Y+".ratings" 
-# L       String   ---      Location of factor matrix L: user-id x feature-id 
-# R       String   ---      Location of factor matrix R: feature-id x item-id
-# V	  	  String   ---      Location of original matrix V: user-id x item-id
-# K	  	  Int      5	    The number of top-K items	
-# fmt     String   "text"   The output format of the factor matrix user-id/item-id/score
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- A matrix containing the top-K item-ids with highest predicted ratings for the users specified in the input matrix X  
-# 2- A matrix containing the top-K predicted ratings for the users specified in the input matrix X  
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar system-ml.jar -f ALS-topk-predict.dml -nvargs X=INPUT_DIR/X L=INPUT_DIR/L R=INPUT_DIR/R V=INTPUT_DIR/V.mtx 
-#													Y=OUTPUT_DIR/Y K=5 fmt=csv
-
-fileX      = $X;
-fileY 	   = $Y;
-fileL	   = $L;
-fileR      = $R;
-fileV	   = $V;
-K	  	   = ifdef ($K, 5);
-fmtO       = ifdef ($fmt, "text");    # $fmt="text";
-
-X = read (fileX);
-L = read (fileL);
-R = read (fileR);
-V = read (fileV);
-
-Vrows = nrow(V);
-Vcols = ncol(V);
-
-zero_cols_ind = ppred (colSums (ppred (R, 0, "!=")), 0, "==");
-K = min (Vcols - sum (zero_cols_ind), K);
-
-n = nrow(X);
-
-Lrows = nrow(L);
-Rcols = ncol(R);
-
-X_user_max = max(X[,1]);
-
-if (X_user_max > Vrows) {
-	stop ("Predictions cannot be provided. Maximum user-id exceeds the number of rows of V.");
-}
-if (Lrows != Vrows | Rcols !=  Vcols) {
-	stop ("Predictions cannot be provided. Number of rows of L (columns of R) does not match the number of rows (column) of V.");
-}
-
-
-# creats projection matrix to select users
-s = seq(1, n);
-ones = matrix (1, rows = n, cols = 1);
-projection_matrix = table(s, X[,1], ones, n, Lrows);
-
-# selects users from factor L
-U_prime = projection_matrix %*% L;
-
-# calculates V_filter for selected users
-V_filter = U_prime %*% R;
-
-# selects users from original V
-V_prime = projection_matrix %*% V;
-
-# filter for already recommended items
-V_prime = ppred(V_prime, 0, '==');
-
-# removes already recommended items and creating user2item matrix
-V_filter = V_prime * V_filter; 
-
-
-# stores sorted movies for selected users 
-V_top_indices = matrix(0, rows = nrow (V_filter), cols = K);
-V_top_values = matrix(0, rows = nrow (V_filter), cols = K);
-
-# a large number to mask the max ratings
-range = max (V_filter) - min (V_filter) + 1;
-
-# uses rowIndexMax/rowMaxs to update kth ratings
-for (i in 1:K){
-	rowIndexMax = rowIndexMax (V_filter);
-	rowMaxs = rowMaxs (V_filter);
-	V_top_indices[,i] = rowIndexMax;
-	V_top_values[,i] = rowMaxs;
-	V_filter = V_filter - range * table (seq (1, nrow (V_filter), 1), rowIndexMax, nrow(V_filter), ncol(V_filter));
-}
-
-V_top_indices = V_top_indices * ppred (V_top_values, 0, ">");
-
-# append users as a first column
-V_top_indices = append (X[,1], V_top_indices);
-V_top_values = append (X[,1], V_top_values);
-
-# writing top K elements
-write (V_top_indices, fileY, format = fmtO);
-write(V_top_values, fileY+".ratings", format = fmtO);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES THE RATING/SCORE FOR A GIVEN LIST OF PAIRS: (USER-ID, ITEM-ID) USING 2 FACTOR MATRICES L AND R
+# WE ASSUME THAT ALL USERS HAVE RATED AT LEAST ONCE AND ALL ITEMS HAVE BEEN RATED AT LEAST ONCE.
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---      Location to read the input user-ids list
+# Y	 	  String   ---	    Location to write the output of top-K prediction: 
+#							 - top-K item-ids will be stored at Y
+#							 - the corresponding top-K ratings will be stored at Y+".ratings" 
+# L       String   ---      Location of factor matrix L: user-id x feature-id 
+# R       String   ---      Location of factor matrix R: feature-id x item-id
+# V	  	  String   ---      Location of original matrix V: user-id x item-id
+# K	  	  Int      5	    The number of top-K items	
+# fmt     String   "text"   The output format of the factor matrix user-id/item-id/score
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- A matrix containing the top-K item-ids with highest predicted ratings for the users specified in the input matrix X  
+# 2- A matrix containing the top-K predicted ratings for the users specified in the input matrix X  
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar system-ml.jar -f ALS-topk-predict.dml -nvargs X=INPUT_DIR/X L=INPUT_DIR/L R=INPUT_DIR/R V=INTPUT_DIR/V.mtx 
+#													Y=OUTPUT_DIR/Y K=5 fmt=csv
+
+fileX      = $X;
+fileY 	   = $Y;
+fileL	   = $L;
+fileR      = $R;
+fileV	   = $V;
+K	  	   = ifdef ($K, 5);
+fmtO       = ifdef ($fmt, "text");    # $fmt="text";
+
+X = read (fileX);
+L = read (fileL);
+R = read (fileR);
+V = read (fileV);
+
+Vrows = nrow(V);
+Vcols = ncol(V);
+
+zero_cols_ind = ppred (colSums (ppred (R, 0, "!=")), 0, "==");
+K = min (Vcols - sum (zero_cols_ind), K);
+
+n = nrow(X);
+
+Lrows = nrow(L);
+Rcols = ncol(R);
+
+X_user_max = max(X[,1]);
+
+if (X_user_max > Vrows) {
+	stop ("Predictions cannot be provided. Maximum user-id exceeds the number of rows of V.");
+}
+if (Lrows != Vrows | Rcols !=  Vcols) {
+	stop ("Predictions cannot be provided. Number of rows of L (columns of R) does not match the number of rows (column) of V.");
+}
+
+
+# creats projection matrix to select users
+s = seq(1, n);
+ones = matrix (1, rows = n, cols = 1);
+projection_matrix = table(s, X[,1], ones, n, Lrows);
+
+# selects users from factor L
+U_prime = projection_matrix %*% L;
+
+# calculates V_filter for selected users
+V_filter = U_prime %*% R;
+
+# selects users from original V
+V_prime = projection_matrix %*% V;
+
+# filter for already recommended items
+V_prime = ppred(V_prime, 0, '==');
+
+# removes already recommended items and creating user2item matrix
+V_filter = V_prime * V_filter; 
+
+
+# stores sorted movies for selected users 
+V_top_indices = matrix(0, rows = nrow (V_filter), cols = K);
+V_top_values = matrix(0, rows = nrow (V_filter), cols = K);
+
+# a large number to mask the max ratings
+range = max (V_filter) - min (V_filter) + 1;
+
+# uses rowIndexMax/rowMaxs to update kth ratings
+for (i in 1:K){
+	rowIndexMax = rowIndexMax (V_filter);
+	rowMaxs = rowMaxs (V_filter);
+	V_top_indices[,i] = rowIndexMax;
+	V_top_values[,i] = rowMaxs;
+	V_filter = V_filter - range * table (seq (1, nrow (V_filter), 1), rowIndexMax, nrow(V_filter), ncol(V_filter));
+}
+
+V_top_indices = V_top_indices * ppred (V_top_values, 0, ">");
+
+# append users as a first column
+V_top_indices = append (X[,1], V_top_indices);
+V_top_values = append (X[,1], V_top_values);
+
+# writing top K elements
+write (V_top_indices, fileY, format = fmtO);
+write(V_top_values, fileY+".ratings", format = fmtO);



[02/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/iris/iris.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/iris/iris.csv b/src/test/scripts/functions/transform/input/iris/iris.csv
index 9bfbda9..11b46be 100644
--- a/src/test/scripts/functions/transform/input/iris/iris.csv
+++ b/src/test/scripts/functions/transform/input/iris/iris.csv
@@ -1,151 +1,151 @@
-ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
-1,5.1,3.5,1.4,0.2,setosa
-2,4.9,3,1.4,0.2,setosa
-3,4.7,3.2,1.3,0.2,setosa
-4,4.6,3.1,1.5,0.2,setosa
-5,5,3.6,1.4,0.2,setosa
-6,5.4,3.9,1.7,0.4,setosa
-7,4.6,3.4,1.4,0.3,setosa
-8,5,3.4,1.5,0.2,setosa
-9,4.4,2.9,1.4,0.2,setosa
-10,4.9,3.1,1.5,0.1,setosa
-11,5.4,3.7,1.5,0.2,setosa
-12,4.8,3.4,1.6,0.2,setosa
-13,4.8,3,1.4,0.1,setosa
-14,4.3,3,1.1,0.1,setosa
-15,5.8,4,1.2,0.2,setosa
-16,5.7,4.4,1.5,0.4,setosa
-17,5.4,3.9,1.3,0.4,setosa
-18,5.1,3.5,1.4,0.3,setosa
-19,5.7,3.8,1.7,0.3,setosa
-20,5.1,3.8,1.5,0.3,setosa
-21,5.4,3.4,1.7,0.2,setosa
-22,5.1,3.7,1.5,0.4,setosa
-23,4.6,3.6,1,0.2,setosa
-24,5.1,3.3,1.7,0.5,setosa
-25,4.8,3.4,1.9,0.2,setosa
-26,5,3,1.6,0.2,setosa
-27,5,3.4,1.6,0.4,setosa
-28,5.2,3.5,1.5,0.2,setosa
-29,5.2,3.4,1.4,0.2,setosa
-30,4.7,3.2,1.6,0.2,setosa
-31,4.8,3.1,1.6,0.2,setosa
-32,5.4,3.4,1.5,0.4,setosa
-33,5.2,4.1,1.5,0.1,setosa
-34,5.5,4.2,1.4,0.2,setosa
-35,4.9,3.1,1.5,0.2,setosa
-36,5,3.2,1.2,0.2,setosa
-37,5.5,3.5,1.3,0.2,setosa
-38,4.9,3.6,1.4,0.1,setosa
-39,4.4,3,1.3,0.2,setosa
-40,5.1,3.4,1.5,0.2,setosa
-41,5,3.5,1.3,0.3,setosa
-42,4.5,2.3,1.3,0.3,setosa
-43,4.4,3.2,1.3,0.2,setosa
-44,5,3.5,1.6,0.6,setosa
-45,5.1,3.8,1.9,0.4,setosa
-46,4.8,3,1.4,0.3,setosa
-47,5.1,3.8,1.6,0.2,setosa
-48,4.6,3.2,1.4,0.2,setosa
-49,5.3,3.7,1.5,0.2,setosa
-50,5,3.3,1.4,0.2,setosa
-51,7,3.2,4.7,1.4,versicolor
-52,6.4,3.2,4.5,1.5,versicolor
-53,6.9,3.1,4.9,1.5,versicolor
-54,5.5,2.3,4,1.3,versicolor
-55,6.5,2.8,4.6,1.5,versicolor
-56,5.7,2.8,4.5,1.3,versicolor
-57,6.3,3.3,4.7,1.6,versicolor
-58,4.9,2.4,3.3,1,versicolor
-59,6.6,2.9,4.6,1.3,versicolor
-60,5.2,2.7,3.9,1.4,versicolor
-61,5,2,3.5,1,versicolor
-62,5.9,3,4.2,1.5,versicolor
-63,6,2.2,4,1,versicolor
-64,6.1,2.9,4.7,1.4,versicolor
-65,5.6,2.9,3.6,1.3,versicolor
-66,6.7,3.1,4.4,1.4,versicolor
-67,5.6,3,4.5,1.5,versicolor
-68,5.8,2.7,4.1,1,versicolor
-69,6.2,2.2,4.5,1.5,versicolor
-70,5.6,2.5,3.9,1.1,versicolor
-71,5.9,3.2,4.8,1.8,versicolor
-72,6.1,2.8,4,1.3,versicolor
-73,6.3,2.5,4.9,1.5,versicolor
-74,6.1,2.8,4.7,1.2,versicolor
-75,6.4,2.9,4.3,1.3,versicolor
-76,6.6,3,4.4,1.4,versicolor
-77,6.8,2.8,4.8,1.4,versicolor
-78,6.7,3,5,1.7,versicolor
-79,6,2.9,4.5,1.5,versicolor
-80,5.7,2.6,3.5,1,versicolor
-81,5.5,2.4,3.8,1.1,versicolor
-82,5.5,2.4,3.7,1,versicolor
-83,5.8,2.7,3.9,1.2,versicolor
-84,6,2.7,5.1,1.6,versicolor
-85,5.4,3,4.5,1.5,versicolor
-86,6,3.4,4.5,1.6,versicolor
-87,6.7,3.1,4.7,1.5,versicolor
-88,6.3,2.3,4.4,1.3,versicolor
-89,5.6,3,4.1,1.3,versicolor
-90,5.5,2.5,4,1.3,versicolor
-91,5.5,2.6,4.4,1.2,versicolor
-92,6.1,3,4.6,1.4,versicolor
-93,5.8,2.6,4,1.2,versicolor
-94,5,2.3,3.3,1,versicolor
-95,5.6,2.7,4.2,1.3,versicolor
-96,5.7,3,4.2,1.2,versicolor
-97,5.7,2.9,4.2,1.3,versicolor
-98,6.2,2.9,4.3,1.3,versicolor
-99,5.1,2.5,3,1.1,versicolor
-100,5.7,2.8,4.1,1.3,versicolor
-101,6.3,3.3,6,2.5,virginica
-102,5.8,2.7,5.1,1.9,virginica
-103,7.1,3,5.9,2.1,virginica
-104,6.3,2.9,5.6,1.8,virginica
-105,6.5,3,5.8,2.2,virginica
-106,7.6,3,6.6,2.1,virginica
-107,4.9,2.5,4.5,1.7,virginica
-108,7.3,2.9,6.3,1.8,virginica
-109,6.7,2.5,5.8,1.8,virginica
-110,7.2,3.6,6.1,2.5,virginica
-111,6.5,3.2,5.1,2,virginica
-112,6.4,2.7,5.3,1.9,virginica
-113,6.8,3,5.5,2.1,virginica
-114,5.7,2.5,5,2,virginica
-115,5.8,2.8,5.1,2.4,virginica
-116,6.4,3.2,5.3,2.3,virginica
-117,6.5,3,5.5,1.8,virginica
-118,7.7,3.8,6.7,2.2,virginica
-119,7.7,2.6,6.9,2.3,virginica
-120,6,2.2,5,1.5,virginica
-121,6.9,3.2,5.7,2.3,virginica
-122,5.6,2.8,4.9,2,virginica
-123,7.7,2.8,6.7,2,virginica
-124,6.3,2.7,4.9,1.8,virginica
-125,6.7,3.3,5.7,2.1,virginica
-126,7.2,3.2,6,1.8,virginica
-127,6.2,2.8,4.8,1.8,virginica
-128,6.1,3,4.9,1.8,virginica
-129,6.4,2.8,5.6,2.1,virginica
-130,7.2,3,5.8,1.6,virginica
-131,7.4,2.8,6.1,1.9,virginica
-132,7.9,3.8,6.4,2,virginica
-133,6.4,2.8,5.6,2.2,virginica
-134,6.3,2.8,5.1,1.5,virginica
-135,6.1,2.6,5.6,1.4,virginica
-136,7.7,3,6.1,2.3,virginica
-137,6.3,3.4,5.6,2.4,virginica
-138,6.4,3.1,5.5,1.8,virginica
-139,6,3,4.8,1.8,virginica
-140,6.9,3.1,5.4,2.1,virginica
-141,6.7,3.1,5.6,2.4,virginica
-142,6.9,3.1,5.1,2.3,virginica
-143,5.8,2.7,5.1,1.9,virginica
-144,6.8,3.2,5.9,2.3,virginica
-145,6.7,3.3,5.7,2.5,virginica
-146,6.7,3,5.2,2.3,virginica
-147,6.3,2.5,5,1.9,virginica
-148,6.5,3,5.2,2,virginica
-149,6.2,3.4,5.4,2.3,virginica
-150,5.9,3,5.1,1.8,virginica
+ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
+1,5.1,3.5,1.4,0.2,setosa
+2,4.9,3,1.4,0.2,setosa
+3,4.7,3.2,1.3,0.2,setosa
+4,4.6,3.1,1.5,0.2,setosa
+5,5,3.6,1.4,0.2,setosa
+6,5.4,3.9,1.7,0.4,setosa
+7,4.6,3.4,1.4,0.3,setosa
+8,5,3.4,1.5,0.2,setosa
+9,4.4,2.9,1.4,0.2,setosa
+10,4.9,3.1,1.5,0.1,setosa
+11,5.4,3.7,1.5,0.2,setosa
+12,4.8,3.4,1.6,0.2,setosa
+13,4.8,3,1.4,0.1,setosa
+14,4.3,3,1.1,0.1,setosa
+15,5.8,4,1.2,0.2,setosa
+16,5.7,4.4,1.5,0.4,setosa
+17,5.4,3.9,1.3,0.4,setosa
+18,5.1,3.5,1.4,0.3,setosa
+19,5.7,3.8,1.7,0.3,setosa
+20,5.1,3.8,1.5,0.3,setosa
+21,5.4,3.4,1.7,0.2,setosa
+22,5.1,3.7,1.5,0.4,setosa
+23,4.6,3.6,1,0.2,setosa
+24,5.1,3.3,1.7,0.5,setosa
+25,4.8,3.4,1.9,0.2,setosa
+26,5,3,1.6,0.2,setosa
+27,5,3.4,1.6,0.4,setosa
+28,5.2,3.5,1.5,0.2,setosa
+29,5.2,3.4,1.4,0.2,setosa
+30,4.7,3.2,1.6,0.2,setosa
+31,4.8,3.1,1.6,0.2,setosa
+32,5.4,3.4,1.5,0.4,setosa
+33,5.2,4.1,1.5,0.1,setosa
+34,5.5,4.2,1.4,0.2,setosa
+35,4.9,3.1,1.5,0.2,setosa
+36,5,3.2,1.2,0.2,setosa
+37,5.5,3.5,1.3,0.2,setosa
+38,4.9,3.6,1.4,0.1,setosa
+39,4.4,3,1.3,0.2,setosa
+40,5.1,3.4,1.5,0.2,setosa
+41,5,3.5,1.3,0.3,setosa
+42,4.5,2.3,1.3,0.3,setosa
+43,4.4,3.2,1.3,0.2,setosa
+44,5,3.5,1.6,0.6,setosa
+45,5.1,3.8,1.9,0.4,setosa
+46,4.8,3,1.4,0.3,setosa
+47,5.1,3.8,1.6,0.2,setosa
+48,4.6,3.2,1.4,0.2,setosa
+49,5.3,3.7,1.5,0.2,setosa
+50,5,3.3,1.4,0.2,setosa
+51,7,3.2,4.7,1.4,versicolor
+52,6.4,3.2,4.5,1.5,versicolor
+53,6.9,3.1,4.9,1.5,versicolor
+54,5.5,2.3,4,1.3,versicolor
+55,6.5,2.8,4.6,1.5,versicolor
+56,5.7,2.8,4.5,1.3,versicolor
+57,6.3,3.3,4.7,1.6,versicolor
+58,4.9,2.4,3.3,1,versicolor
+59,6.6,2.9,4.6,1.3,versicolor
+60,5.2,2.7,3.9,1.4,versicolor
+61,5,2,3.5,1,versicolor
+62,5.9,3,4.2,1.5,versicolor
+63,6,2.2,4,1,versicolor
+64,6.1,2.9,4.7,1.4,versicolor
+65,5.6,2.9,3.6,1.3,versicolor
+66,6.7,3.1,4.4,1.4,versicolor
+67,5.6,3,4.5,1.5,versicolor
+68,5.8,2.7,4.1,1,versicolor
+69,6.2,2.2,4.5,1.5,versicolor
+70,5.6,2.5,3.9,1.1,versicolor
+71,5.9,3.2,4.8,1.8,versicolor
+72,6.1,2.8,4,1.3,versicolor
+73,6.3,2.5,4.9,1.5,versicolor
+74,6.1,2.8,4.7,1.2,versicolor
+75,6.4,2.9,4.3,1.3,versicolor
+76,6.6,3,4.4,1.4,versicolor
+77,6.8,2.8,4.8,1.4,versicolor
+78,6.7,3,5,1.7,versicolor
+79,6,2.9,4.5,1.5,versicolor
+80,5.7,2.6,3.5,1,versicolor
+81,5.5,2.4,3.8,1.1,versicolor
+82,5.5,2.4,3.7,1,versicolor
+83,5.8,2.7,3.9,1.2,versicolor
+84,6,2.7,5.1,1.6,versicolor
+85,5.4,3,4.5,1.5,versicolor
+86,6,3.4,4.5,1.6,versicolor
+87,6.7,3.1,4.7,1.5,versicolor
+88,6.3,2.3,4.4,1.3,versicolor
+89,5.6,3,4.1,1.3,versicolor
+90,5.5,2.5,4,1.3,versicolor
+91,5.5,2.6,4.4,1.2,versicolor
+92,6.1,3,4.6,1.4,versicolor
+93,5.8,2.6,4,1.2,versicolor
+94,5,2.3,3.3,1,versicolor
+95,5.6,2.7,4.2,1.3,versicolor
+96,5.7,3,4.2,1.2,versicolor
+97,5.7,2.9,4.2,1.3,versicolor
+98,6.2,2.9,4.3,1.3,versicolor
+99,5.1,2.5,3,1.1,versicolor
+100,5.7,2.8,4.1,1.3,versicolor
+101,6.3,3.3,6,2.5,virginica
+102,5.8,2.7,5.1,1.9,virginica
+103,7.1,3,5.9,2.1,virginica
+104,6.3,2.9,5.6,1.8,virginica
+105,6.5,3,5.8,2.2,virginica
+106,7.6,3,6.6,2.1,virginica
+107,4.9,2.5,4.5,1.7,virginica
+108,7.3,2.9,6.3,1.8,virginica
+109,6.7,2.5,5.8,1.8,virginica
+110,7.2,3.6,6.1,2.5,virginica
+111,6.5,3.2,5.1,2,virginica
+112,6.4,2.7,5.3,1.9,virginica
+113,6.8,3,5.5,2.1,virginica
+114,5.7,2.5,5,2,virginica
+115,5.8,2.8,5.1,2.4,virginica
+116,6.4,3.2,5.3,2.3,virginica
+117,6.5,3,5.5,1.8,virginica
+118,7.7,3.8,6.7,2.2,virginica
+119,7.7,2.6,6.9,2.3,virginica
+120,6,2.2,5,1.5,virginica
+121,6.9,3.2,5.7,2.3,virginica
+122,5.6,2.8,4.9,2,virginica
+123,7.7,2.8,6.7,2,virginica
+124,6.3,2.7,4.9,1.8,virginica
+125,6.7,3.3,5.7,2.1,virginica
+126,7.2,3.2,6,1.8,virginica
+127,6.2,2.8,4.8,1.8,virginica
+128,6.1,3,4.9,1.8,virginica
+129,6.4,2.8,5.6,2.1,virginica
+130,7.2,3,5.8,1.6,virginica
+131,7.4,2.8,6.1,1.9,virginica
+132,7.9,3.8,6.4,2,virginica
+133,6.4,2.8,5.6,2.2,virginica
+134,6.3,2.8,5.1,1.5,virginica
+135,6.1,2.6,5.6,1.4,virginica
+136,7.7,3,6.1,2.3,virginica
+137,6.3,3.4,5.6,2.4,virginica
+138,6.4,3.1,5.5,1.8,virginica
+139,6,3,4.8,1.8,virginica
+140,6.9,3.1,5.4,2.1,virginica
+141,6.7,3.1,5.6,2.4,virginica
+142,6.9,3.1,5.1,2.3,virginica
+143,5.8,2.7,5.1,1.9,virginica
+144,6.8,3.2,5.9,2.3,virginica
+145,6.7,3.3,5.7,2.5,virginica
+146,6.7,3,5.2,2.3,virginica
+147,6.3,2.5,5,1.9,virginica
+148,6.5,3,5.2,2,virginica
+149,6.2,3.4,5.4,2.3,virginica
+150,5.9,3,5.1,1.8,virginica

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/iris/iris.transformed.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/iris/iris.transformed.csv b/src/test/scripts/functions/transform/input/iris/iris.transformed.csv
index ed7a19a..0ca65bf 100644
--- a/src/test/scripts/functions/transform/input/iris/iris.transformed.csv
+++ b/src/test/scripts/functions/transform/input/iris/iris.transformed.csv
@@ -1,151 +1,151 @@
-ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species_setosa,Species_versicolor,Species_virginica
-1,1,3.5,1,0.2,1,0,0
-2,1,3,1,0.2,1,0,0
-3,1,3.2,1,0.2,1,0,0
-4,1,3.1,1,0.2,1,0,0
-5,1,3.6,1,0.2,1,0,0
-6,1,3.9,1,0.4,1,0,0
-7,1,3.4,1,0.3,1,0,0
-8,1,3.4,1,0.2,1,0,0
-9,1,2.9,1,0.2,1,0,0
-10,1,3.1,1,0.1,1,0,0
-11,1,3.7,1,0.2,1,0,0
-12,1,3.4,1,0.2,1,0,0
-13,1,3,1,0.1,1,0,0
-14,1,3,1,0.1,1,0,0
-15,1,4,1,0.2,1,0,0
-16,1,4.4,1,0.4,1,0,0
-17,1,3.9,1,0.4,1,0,0
-18,1,3.5,1,0.3,1,0,0
-19,1,3.8,1,0.3,1,0,0
-20,1,3.8,1,0.3,1,0,0
-21,1,3.4,1,0.2,1,0,0
-22,1,3.7,1,0.4,1,0,0
-23,1,3.6,1,0.2,1,0,0
-24,1,3.3,1,0.5,1,0,0
-25,1,3.4,1,0.2,1,0,0
-26,1,3,1,0.2,1,0,0
-27,1,3.4,1,0.4,1,0,0
-28,1,3.5,1,0.2,1,0,0
-29,1,3.4,1,0.2,1,0,0
-30,1,3.2,1,0.2,1,0,0
-31,1,3.1,1,0.2,1,0,0
-32,1,3.4,1,0.4,1,0,0
-33,1,4.1,1,0.1,1,0,0
-34,1,4.2,1,0.2,1,0,0
-35,1,3.1,1,0.2,1,0,0
-36,1,3.2,1,0.2,1,0,0
-37,1,3.5,1,0.2,1,0,0
-38,1,3.6,1,0.1,1,0,0
-39,1,3,1,0.2,1,0,0
-40,1,3.4,1,0.2,1,0,0
-41,1,3.5,1,0.3,1,0,0
-42,1,2.3,1,0.3,1,0,0
-43,1,3.2,1,0.2,1,0,0
-44,1,3.5,1,0.6,1,0,0
-45,1,3.8,1,0.4,1,0,0
-46,1,3,1,0.3,1,0,0
-47,1,3.8,1,0.2,1,0,0
-48,1,3.2,1,0.2,1,0,0
-49,1,3.7,1,0.2,1,0,0
-50,1,3.3,1,0.2,1,0,0
-51,2,3.2,2,1.4,0,1,0
-52,2,3.2,2,1.5,0,1,0
-53,2,3.1,2,1.5,0,1,0
-54,1,2.3,2,1.3,0,1,0
-55,2,2.8,2,1.5,0,1,0
-56,1,2.8,2,1.3,0,1,0
-57,2,3.3,2,1.6,0,1,0
-58,1,2.4,1,1,0,1,0
-59,2,2.9,2,1.3,0,1,0
-60,1,2.7,1,1.4,0,1,0
-61,1,2,1,1,0,1,0
-62,1,3,2,1.5,0,1,0
-63,1,2.2,2,1,0,1,0
-64,1,2.9,2,1.4,0,1,0
-65,1,2.9,1,1.3,0,1,0
-66,2,3.1,2,1.4,0,1,0
-67,1,3,2,1.5,0,1,0
-68,1,2.7,2,1,0,1,0
-69,2,2.2,2,1.5,0,1,0
-70,1,2.5,1,1.1,0,1,0
-71,1,3.2,2,1.8,0,1,0
-72,1,2.8,2,1.3,0,1,0
-73,2,2.5,2,1.5,0,1,0
-74,1,2.8,2,1.2,0,1,0
-75,2,2.9,2,1.3,0,1,0
-76,2,3,2,1.4,0,1,0
-77,2,2.8,2,1.4,0,1,0
-78,2,3,2,1.7,0,1,0
-79,1,2.9,2,1.5,0,1,0
-80,1,2.6,1,1,0,1,0
-81,1,2.4,1,1.1,0,1,0
-82,1,2.4,1,1,0,1,0
-83,1,2.7,1,1.2,0,1,0
-84,1,2.7,2,1.6,0,1,0
-85,1,3,2,1.5,0,1,0
-86,1,3.4,2,1.6,0,1,0
-87,2,3.1,2,1.5,0,1,0
-88,2,2.3,2,1.3,0,1,0
-89,1,3,2,1.3,0,1,0
-90,1,2.5,2,1.3,0,1,0
-91,1,2.6,2,1.2,0,1,0
-92,1,3,2,1.4,0,1,0
-93,1,2.6,2,1.2,0,1,0
-94,1,2.3,1,1,0,1,0
-95,1,2.7,2,1.3,0,1,0
-96,1,3,2,1.2,0,1,0
-97,1,2.9,2,1.3,0,1,0
-98,2,2.9,2,1.3,0,1,0
-99,1,2.5,1,1.1,0,1,0
-100,1,2.8,2,1.3,0,1,0
-101,2,3.3,2,2.5,0,0,1
-102,1,2.7,2,1.9,0,0,1
-103,2,3,2,2.1,0,0,1
-104,2,2.9,2,1.8,0,0,1
-105,2,3,2,2.2,0,0,1
-106,2,3,2,2.1,0,0,1
-107,1,2.5,2,1.7,0,0,1
-108,2,2.9,2,1.8,0,0,1
-109,2,2.5,2,1.8,0,0,1
-110,2,3.6,2,2.5,0,0,1
-111,2,3.2,2,2,0,0,1
-112,2,2.7,2,1.9,0,0,1
-113,2,3,2,2.1,0,0,1
-114,1,2.5,2,2,0,0,1
-115,1,2.8,2,2.4,0,0,1
-116,2,3.2,2,2.3,0,0,1
-117,2,3,2,1.8,0,0,1
-118,2,3.8,2,2.2,0,0,1
-119,2,2.6,2,2.3,0,0,1
-120,1,2.2,2,1.5,0,0,1
-121,2,3.2,2,2.3,0,0,1
-122,1,2.8,2,2,0,0,1
-123,2,2.8,2,2,0,0,1
-124,2,2.7,2,1.8,0,0,1
-125,2,3.3,2,2.1,0,0,1
-126,2,3.2,2,1.8,0,0,1
-127,2,2.8,2,1.8,0,0,1
-128,1,3,2,1.8,0,0,1
-129,2,2.8,2,2.1,0,0,1
-130,2,3,2,1.6,0,0,1
-131,2,2.8,2,1.9,0,0,1
-132,2,3.8,2,2,0,0,1
-133,2,2.8,2,2.2,0,0,1
-134,2,2.8,2,1.5,0,0,1
-135,1,2.6,2,1.4,0,0,1
-136,2,3,2,2.3,0,0,1
-137,2,3.4,2,2.4,0,0,1
-138,2,3.1,2,1.8,0,0,1
-139,1,3,2,1.8,0,0,1
-140,2,3.1,2,2.1,0,0,1
-141,2,3.1,2,2.4,0,0,1
-142,2,3.1,2,2.3,0,0,1
-143,1,2.7,2,1.9,0,0,1
-144,2,3.2,2,2.3,0,0,1
-145,2,3.3,2,2.5,0,0,1
-146,2,3,2,2.3,0,0,1
-147,2,2.5,2,1.9,0,0,1
-148,2,3,2,2,0,0,1
-149,2,3.4,2,2.3,0,0,1
-150,1,3,2,1.8,0,0,1
+ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species_setosa,Species_versicolor,Species_virginica
+1,1,3.5,1,0.2,1,0,0
+2,1,3,1,0.2,1,0,0
+3,1,3.2,1,0.2,1,0,0
+4,1,3.1,1,0.2,1,0,0
+5,1,3.6,1,0.2,1,0,0
+6,1,3.9,1,0.4,1,0,0
+7,1,3.4,1,0.3,1,0,0
+8,1,3.4,1,0.2,1,0,0
+9,1,2.9,1,0.2,1,0,0
+10,1,3.1,1,0.1,1,0,0
+11,1,3.7,1,0.2,1,0,0
+12,1,3.4,1,0.2,1,0,0
+13,1,3,1,0.1,1,0,0
+14,1,3,1,0.1,1,0,0
+15,1,4,1,0.2,1,0,0
+16,1,4.4,1,0.4,1,0,0
+17,1,3.9,1,0.4,1,0,0
+18,1,3.5,1,0.3,1,0,0
+19,1,3.8,1,0.3,1,0,0
+20,1,3.8,1,0.3,1,0,0
+21,1,3.4,1,0.2,1,0,0
+22,1,3.7,1,0.4,1,0,0
+23,1,3.6,1,0.2,1,0,0
+24,1,3.3,1,0.5,1,0,0
+25,1,3.4,1,0.2,1,0,0
+26,1,3,1,0.2,1,0,0
+27,1,3.4,1,0.4,1,0,0
+28,1,3.5,1,0.2,1,0,0
+29,1,3.4,1,0.2,1,0,0
+30,1,3.2,1,0.2,1,0,0
+31,1,3.1,1,0.2,1,0,0
+32,1,3.4,1,0.4,1,0,0
+33,1,4.1,1,0.1,1,0,0
+34,1,4.2,1,0.2,1,0,0
+35,1,3.1,1,0.2,1,0,0
+36,1,3.2,1,0.2,1,0,0
+37,1,3.5,1,0.2,1,0,0
+38,1,3.6,1,0.1,1,0,0
+39,1,3,1,0.2,1,0,0
+40,1,3.4,1,0.2,1,0,0
+41,1,3.5,1,0.3,1,0,0
+42,1,2.3,1,0.3,1,0,0
+43,1,3.2,1,0.2,1,0,0
+44,1,3.5,1,0.6,1,0,0
+45,1,3.8,1,0.4,1,0,0
+46,1,3,1,0.3,1,0,0
+47,1,3.8,1,0.2,1,0,0
+48,1,3.2,1,0.2,1,0,0
+49,1,3.7,1,0.2,1,0,0
+50,1,3.3,1,0.2,1,0,0
+51,2,3.2,2,1.4,0,1,0
+52,2,3.2,2,1.5,0,1,0
+53,2,3.1,2,1.5,0,1,0
+54,1,2.3,2,1.3,0,1,0
+55,2,2.8,2,1.5,0,1,0
+56,1,2.8,2,1.3,0,1,0
+57,2,3.3,2,1.6,0,1,0
+58,1,2.4,1,1,0,1,0
+59,2,2.9,2,1.3,0,1,0
+60,1,2.7,1,1.4,0,1,0
+61,1,2,1,1,0,1,0
+62,1,3,2,1.5,0,1,0
+63,1,2.2,2,1,0,1,0
+64,1,2.9,2,1.4,0,1,0
+65,1,2.9,1,1.3,0,1,0
+66,2,3.1,2,1.4,0,1,0
+67,1,3,2,1.5,0,1,0
+68,1,2.7,2,1,0,1,0
+69,2,2.2,2,1.5,0,1,0
+70,1,2.5,1,1.1,0,1,0
+71,1,3.2,2,1.8,0,1,0
+72,1,2.8,2,1.3,0,1,0
+73,2,2.5,2,1.5,0,1,0
+74,1,2.8,2,1.2,0,1,0
+75,2,2.9,2,1.3,0,1,0
+76,2,3,2,1.4,0,1,0
+77,2,2.8,2,1.4,0,1,0
+78,2,3,2,1.7,0,1,0
+79,1,2.9,2,1.5,0,1,0
+80,1,2.6,1,1,0,1,0
+81,1,2.4,1,1.1,0,1,0
+82,1,2.4,1,1,0,1,0
+83,1,2.7,1,1.2,0,1,0
+84,1,2.7,2,1.6,0,1,0
+85,1,3,2,1.5,0,1,0
+86,1,3.4,2,1.6,0,1,0
+87,2,3.1,2,1.5,0,1,0
+88,2,2.3,2,1.3,0,1,0
+89,1,3,2,1.3,0,1,0
+90,1,2.5,2,1.3,0,1,0
+91,1,2.6,2,1.2,0,1,0
+92,1,3,2,1.4,0,1,0
+93,1,2.6,2,1.2,0,1,0
+94,1,2.3,1,1,0,1,0
+95,1,2.7,2,1.3,0,1,0
+96,1,3,2,1.2,0,1,0
+97,1,2.9,2,1.3,0,1,0
+98,2,2.9,2,1.3,0,1,0
+99,1,2.5,1,1.1,0,1,0
+100,1,2.8,2,1.3,0,1,0
+101,2,3.3,2,2.5,0,0,1
+102,1,2.7,2,1.9,0,0,1
+103,2,3,2,2.1,0,0,1
+104,2,2.9,2,1.8,0,0,1
+105,2,3,2,2.2,0,0,1
+106,2,3,2,2.1,0,0,1
+107,1,2.5,2,1.7,0,0,1
+108,2,2.9,2,1.8,0,0,1
+109,2,2.5,2,1.8,0,0,1
+110,2,3.6,2,2.5,0,0,1
+111,2,3.2,2,2,0,0,1
+112,2,2.7,2,1.9,0,0,1
+113,2,3,2,2.1,0,0,1
+114,1,2.5,2,2,0,0,1
+115,1,2.8,2,2.4,0,0,1
+116,2,3.2,2,2.3,0,0,1
+117,2,3,2,1.8,0,0,1
+118,2,3.8,2,2.2,0,0,1
+119,2,2.6,2,2.3,0,0,1
+120,1,2.2,2,1.5,0,0,1
+121,2,3.2,2,2.3,0,0,1
+122,1,2.8,2,2,0,0,1
+123,2,2.8,2,2,0,0,1
+124,2,2.7,2,1.8,0,0,1
+125,2,3.3,2,2.1,0,0,1
+126,2,3.2,2,1.8,0,0,1
+127,2,2.8,2,1.8,0,0,1
+128,1,3,2,1.8,0,0,1
+129,2,2.8,2,2.1,0,0,1
+130,2,3,2,1.6,0,0,1
+131,2,2.8,2,1.9,0,0,1
+132,2,3.8,2,2,0,0,1
+133,2,2.8,2,2.2,0,0,1
+134,2,2.8,2,1.5,0,0,1
+135,1,2.6,2,1.4,0,0,1
+136,2,3,2,2.3,0,0,1
+137,2,3.4,2,2.4,0,0,1
+138,2,3.1,2,1.8,0,0,1
+139,1,3,2,1.8,0,0,1
+140,2,3.1,2,2.1,0,0,1
+141,2,3.1,2,2.4,0,0,1
+142,2,3.1,2,2.3,0,0,1
+143,1,2.7,2,1.9,0,0,1
+144,2,3.2,2,2.3,0,0,1
+145,2,3.3,2,2.5,0,0,1
+146,2,3,2,2.3,0,0,1
+147,2,2.5,2,1.9,0,0,1
+148,2,3,2,2,0,0,1
+149,2,3.4,2,2.3,0,0,1
+150,1,3,2,1.8,0,0,1

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/ACosTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/ACosTest.dml b/src/test/scripts/functions/unary/matrix/ACosTest.dml
index 0e36efd..3259f57 100644
--- a/src/test/scripts/functions/unary/matrix/ACosTest.dml
+++ b/src/test/scripts/functions/unary/matrix/ACosTest.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.CosTest.java
-
-Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
-CosVector = acos(Vector);
-write(CosVector, "$$outdir$$vector", format="text");
-
-Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
-CosMatrix = acos(Matrix);
-write(CosMatrix, "$$outdir$$matrix", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.CosTest.java
+
+Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
+CosVector = acos(Vector);
+write(CosVector, "$$outdir$$vector", format="text");
+
+Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
+CosMatrix = acos(Matrix);
+write(CosMatrix, "$$outdir$$matrix", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/ASinTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/ASinTest.dml b/src/test/scripts/functions/unary/matrix/ASinTest.dml
index fce7ac6..07dbf61 100644
--- a/src/test/scripts/functions/unary/matrix/ASinTest.dml
+++ b/src/test/scripts/functions/unary/matrix/ASinTest.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.SinTest.java
-
-Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
-SinVector = asin(Vector);
-write(SinVector, "$$outdir$$vector", format="text");
-
-Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
-SinMatrix = asin(Matrix);
-write(SinMatrix, "$$outdir$$matrix", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.SinTest.java
+
+Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
+SinVector = asin(Vector);
+write(SinVector, "$$outdir$$vector", format="text");
+
+Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
+SinMatrix = asin(Matrix);
+write(SinMatrix, "$$outdir$$matrix", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/ATanTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/ATanTest.dml b/src/test/scripts/functions/unary/matrix/ATanTest.dml
index 3b9e6a6..e7b1b54 100644
--- a/src/test/scripts/functions/unary/matrix/ATanTest.dml
+++ b/src/test/scripts/functions/unary/matrix/ATanTest.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.TanTest.java
-
-Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
-TanVector = atan(Vector);
-write(TanVector, "$$outdir$$vector", format="text");
-
-Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
-TanMatrix = atan(Matrix);
-write(TanMatrix, "$$outdir$$matrix", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.matrix.TanTest.java
+
+Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
+TanVector = atan(Vector);
+write(TanVector, "$$outdir$$vector", format="text");
+
+Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
+TanMatrix = atan(Matrix);
+write(TanMatrix, "$$outdir$$matrix", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Ceil.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Ceil.R b/src/test/scripts/functions/unary/matrix/Ceil.R
index 8f2c36a..3dcef39 100644
--- a/src/test/scripts/functions/unary/matrix/Ceil.R
+++ b/src/test/scripts/functions/unary/matrix/Ceil.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
-
-R = ceiling(A);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
+
+R = ceiling(A);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Cummax.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Cummax.R b/src/test/scripts/functions/unary/matrix/Cummax.R
index 4679014..ad2d0ed 100644
--- a/src/test/scripts/functions/unary/matrix/Cummax.R
+++ b/src/test/scripts/functions/unary/matrix/Cummax.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-if( nrow(A)>1 ){
-   B = apply(A, 2, cummax);
-} else {
-   B = A;
-}
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+if( nrow(A)>1 ){
+   B = apply(A, 2, cummax);
+} else {
+   B = A;
+}
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Cummin.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Cummin.R b/src/test/scripts/functions/unary/matrix/Cummin.R
index c10a4de..dc3a6b4 100644
--- a/src/test/scripts/functions/unary/matrix/Cummin.R
+++ b/src/test/scripts/functions/unary/matrix/Cummin.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-if( nrow(A)>1 ){
-   B = apply(A, 2, cummin);
-} else {
-   B = A;
-}
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+if( nrow(A)>1 ){
+   B = apply(A, 2, cummin);
+} else {
+   B = A;
+}
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Cumprod.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Cumprod.R b/src/test/scripts/functions/unary/matrix/Cumprod.R
index 7f757a4..851c277 100644
--- a/src/test/scripts/functions/unary/matrix/Cumprod.R
+++ b/src/test/scripts/functions/unary/matrix/Cumprod.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-if( nrow(A)>1 ){
-   B = apply(A, 2, cumprod);
-} else {
-   B = A;
-}
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+if( nrow(A)>1 ){
+   B = apply(A, 2, cumprod);
+} else {
+   B = A;
+}
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Cumsum.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Cumsum.R b/src/test/scripts/functions/unary/matrix/Cumsum.R
index f59c66e..71e78ff 100644
--- a/src/test/scripts/functions/unary/matrix/Cumsum.R
+++ b/src/test/scripts/functions/unary/matrix/Cumsum.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-if( nrow(A)>1 ){
-   B = apply(A, 2, cumsum);
-} else {
-   B = A;
-}
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+if( nrow(A)>1 ){
+   B = apply(A, 2, cumsum);
+} else {
+   B = A;
+}
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Floor.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Floor.R b/src/test/scripts/functions/unary/matrix/Floor.R
index 86daba7..5fa09aa 100644
--- a/src/test/scripts/functions/unary/matrix/Floor.R
+++ b/src/test/scripts/functions/unary/matrix/Floor.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
-
-R = floor(A);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
+
+R = floor(A);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Inverse.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Inverse.R b/src/test/scripts/functions/unary/matrix/Inverse.R
index acbe0b9..ca3cd98 100644
--- a/src/test/scripts/functions/unary/matrix/Inverse.R
+++ b/src/test/scripts/functions/unary/matrix/Inverse.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-# Rscript ./test/scripts/functions/unary/matrix/Inverse.R ./test/scripts/functions/unary/matrix/in/A.mtx ./test/scripts/functions/unary/matrix/expected/AI
-
-A = readMM(args[1]); 
-
-AI = solve(A);
-
-writeMM(as(AI, "CsparseMatrix"), args[2]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+# Rscript ./test/scripts/functions/unary/matrix/Inverse.R ./test/scripts/functions/unary/matrix/in/A.mtx ./test/scripts/functions/unary/matrix/expected/AI
+
+A = readMM(args[1]); 
+
+AI = solve(A);
+
+writeMM(as(AI, "CsparseMatrix"), args[2]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Inverse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Inverse.dml b/src/test/scripts/functions/unary/matrix/Inverse.dml
index f420658..100a72e 100644
--- a/src/test/scripts/functions/unary/matrix/Inverse.dml
+++ b/src/test/scripts/functions/unary/matrix/Inverse.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-
-# Compute matrix inverse
-
-A = read($1);
-
-AI = inv(A);
-
-write(AI, $2);
+
+
+# Compute matrix inverse
+
+A = read($1);
+
+AI = inv(A);
+
+write(AI, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Minus.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Minus.R b/src/test/scripts/functions/unary/matrix/Minus.R
index 4d1c04c..04801a8 100644
--- a/src/test/scripts/functions/unary/matrix/Minus.R
+++ b/src/test/scripts/functions/unary/matrix/Minus.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-
-Y = -X;
-
-writeMM(as(Y, "CsparseMatrix"), paste(args[2], "Y", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+
+Y = -X;
+
+writeMM(as(Y, "CsparseMatrix"), paste(args[2], "Y", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Minus.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Minus.dml b/src/test/scripts/functions/unary/matrix/Minus.dml
index a893fe4..6f09fe5 100644
--- a/src/test/scripts/functions/unary/matrix/Minus.dml
+++ b/src/test/scripts/functions/unary/matrix/Minus.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3, format="text");
-Y = -X;
-write(Y, $4, format="text");  
+
+X = read($1, rows=$2, cols=$3, format="text");
+Y = -X;
+write(Y, $4, format="text");  

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/QRsolve.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/QRsolve.R b/src/test/scripts/functions/unary/matrix/QRsolve.R
index 10ac5dd..861877f 100644
--- a/src/test/scripts/functions/unary/matrix/QRsolve.R
+++ b/src/test/scripts/functions/unary/matrix/QRsolve.R
@@ -1,44 +1,44 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-# Rscript ./test/scripts/functions/unary/matrix/QRsolve.R ./test/scripts/functions/unary/matrix/in/A.mtx ./test/scripts/functions/unary/matrix/in/y.mtx ./test/scripts/functions/unary/matrix/expected/x
-
-A = readMM(args[1]); #paste(args[1], "A.mtx", sep=""));
-b = readMM(args[2]); #paste(args[1], "b.mtx", sep=""));
-
-m = nrow(A);
-n = ncol(A);
-
-Ab = cbind(as.matrix(A), as.matrix(b));
-
-Ab_qr = qr(Ab);
-Rb = qr.R(Ab_qr); 
-
-R = Rb[1:n, 1:n];
-c = Rb[1:n, (n+1)];
-
-x = solve(R,c);
-
-writeMM(as(x, "CsparseMatrix"), args[3]); #paste(args[2], "x.mtx", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+# Rscript ./test/scripts/functions/unary/matrix/QRsolve.R ./test/scripts/functions/unary/matrix/in/A.mtx ./test/scripts/functions/unary/matrix/in/y.mtx ./test/scripts/functions/unary/matrix/expected/x
+
+A = readMM(args[1]); #paste(args[1], "A.mtx", sep=""));
+b = readMM(args[2]); #paste(args[1], "b.mtx", sep=""));
+
+m = nrow(A);
+n = ncol(A);
+
+Ab = cbind(as.matrix(A), as.matrix(b));
+
+Ab_qr = qr(Ab);
+Rb = qr.R(Ab_qr); 
+
+R = Rb[1:n, 1:n];
+c = Rb[1:n, (n+1)];
+
+x = solve(R,c);
+
+writeMM(as(x, "CsparseMatrix"), args[3]); #paste(args[2], "x.mtx", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/QRsolve.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/QRsolve.dml b/src/test/scripts/functions/unary/matrix/QRsolve.dml
index 1852aaa..c418901 100644
--- a/src/test/scripts/functions/unary/matrix/QRsolve.dml
+++ b/src/test/scripts/functions/unary/matrix/QRsolve.dml
@@ -1,42 +1,42 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Ax = b: solve for x via QR decomposition
-
-#solve = externalFunction(Matrix[Double] a, Matrix[Double] b)  return(Matrix[Double] c)
-#  implemented in (classname="org.apache.sysml.udf.lib.LinearSolverWrapperCP",exectype="mem")   
-
-A = read($1);
-b = read($2);
-
-m = nrow(A);
-n = ncol(A);
-
-Ab = append(A,b);
-
-[Hb,Rb] = qr(Ab);
-
-R = Rb[1:n, 1:n];
-c = Rb[1:n, (n+1)]
-
-x = solve(R,c);
-
-write(x, $3);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Ax = b: solve for x via QR decomposition
+
+#solve = externalFunction(Matrix[Double] a, Matrix[Double] b)  return(Matrix[Double] c)
+#  implemented in (classname="org.apache.sysml.udf.lib.LinearSolverWrapperCP",exectype="mem")   
+
+A = read($1);
+b = read($2);
+
+m = nrow(A);
+n = ncol(A);
+
+Ab = append(A,b);
+
+[Hb,Rb] = qr(Ab);
+
+R = Rb[1:n, 1:n];
+c = Rb[1:n, (n+1)]
+
+x = solve(R,c);
+
+write(x, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/RoundTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/RoundTest.R b/src/test/scripts/functions/unary/matrix/RoundTest.R
index 3090949..8237d5c 100644
--- a/src/test/scripts/functions/unary/matrix/RoundTest.R
+++ b/src/test/scripts/functions/unary/matrix/RoundTest.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
-
-R = round(A);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "math.mtx", sep="")))
+
+R = round(A);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/SProp.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/SProp.R b/src/test/scripts/functions/unary/matrix/SProp.R
index 7d54148..bdfa9d4 100644
--- a/src/test/scripts/functions/unary/matrix/SProp.R
+++ b/src/test/scripts/functions/unary/matrix/SProp.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = A * (1-A);
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = A * (1-A);
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/SelPos.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/SelPos.R b/src/test/scripts/functions/unary/matrix/SelPos.R
index cbe9941..1dbbcb7 100644
--- a/src/test/scripts/functions/unary/matrix/SelPos.R
+++ b/src/test/scripts/functions/unary/matrix/SelPos.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = A*(A>0);
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = A*(A>0);
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Sigmoid.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Sigmoid.R b/src/test/scripts/functions/unary/matrix/Sigmoid.R
index d1c3be2..78cf8da 100644
--- a/src/test/scripts/functions/unary/matrix/Sigmoid.R
+++ b/src/test/scripts/functions/unary/matrix/Sigmoid.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = 1 / (1+exp(-A));
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = 1 / (1+exp(-A));
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Sign1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Sign1.R b/src/test/scripts/functions/unary/matrix/Sign1.R
index 837ba80..f4bfb09 100644
--- a/src/test/scripts/functions/unary/matrix/Sign1.R
+++ b/src/test/scripts/functions/unary/matrix/Sign1.R
@@ -1,33 +1,33 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = sign(A);
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = sign(A);
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/Sign2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/Sign2.R b/src/test/scripts/functions/unary/matrix/Sign2.R
index 837ba80..f4bfb09 100644
--- a/src/test/scripts/functions/unary/matrix/Sign2.R
+++ b/src/test/scripts/functions/unary/matrix/Sign2.R
@@ -1,33 +1,33 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = sign(A);
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = sign(A);
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/eigen.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/eigen.dml b/src/test/scripts/functions/unary/matrix/eigen.dml
index 5ec2a3e..e4c7c46 100644
--- a/src/test/scripts/functions/unary/matrix/eigen.dml
+++ b/src/test/scripts/functions/unary/matrix/eigen.dml
@@ -19,42 +19,42 @@
 #
 #-------------------------------------------------------------
 
-
-/*
- * DML script to test Eigen Factorization
- */
-
-A = read($1);
-A = t(A) %*% A; # make the input matrix symmetric
-
-[eval, evec] = eigen(A);
-
-/*
-B = evec %*% diag(eval) %*% t(evec);
-diff = sum(A - B);
-D = matrix(1,1,1);
-D = diff*D;
-*/
-
-numEval = $2;
-D = matrix(1, numEval, 1);
-for ( i in 1:numEval ) {
-    Av = A %*% evec[,i];
-    rhs = castAsScalar(eval[i,1]) * evec[,i];
-    diff = sum(Av-rhs);
-    D[i,1] = diff;
-}
-
-/*
-# TODO: dummy if() must be removed
-v = evec[,1];
-Av = A %*% v;
-rhs = castAsScalar(eval[1,1]) * evec[,1];
-diff = sum(Av-rhs);
-
-D = matrix(1,1,1);
-D = diff*D;
-*/
-
-write(D, $3);
-
+
+/*
+ * DML script to test Eigen Factorization
+ */
+
+A = read($1);
+A = t(A) %*% A; # make the input matrix symmetric
+
+[eval, evec] = eigen(A);
+
+/*
+B = evec %*% diag(eval) %*% t(evec);
+diff = sum(A - B);
+D = matrix(1,1,1);
+D = diff*D;
+*/
+
+numEval = $2;
+D = matrix(1, numEval, 1);
+for ( i in 1:numEval ) {
+    Av = A %*% evec[,i];
+    rhs = castAsScalar(eval[i,1]) * evec[,i];
+    diff = sum(Av-rhs);
+    D[i,1] = diff;
+}
+
+/*
+# TODO: dummy if() must be removed
+v = evec[,1];
+Av = A %*% v;
+rhs = castAsScalar(eval[1,1]) * evec[,1];
+diff = sum(Av-rhs);
+
+D = matrix(1,1,1);
+D = diff*D;
+*/
+
+write(D, $3);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/lu.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/lu.dml b/src/test/scripts/functions/unary/matrix/lu.dml
index 24c9a68..8ad83a0 100644
--- a/src/test/scripts/functions/unary/matrix/lu.dml
+++ b/src/test/scripts/functions/unary/matrix/lu.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-/*
- * DML script to test LU Factorization
- */
-
-A = read($1);
-
-[P, L, U] = lu(A);
-
-PA = P %*% A;
-LU = L %*% U;
-
-diff = sum(PA - LU);
-D = matrix(1,1,1);
-D = diff*D;
-
-write(D, $2);
-
+
+/*
+ * DML script to test LU Factorization
+ */
+
+A = read($1);
+
+[P, L, U] = lu(A);
+
+PA = P %*% A;
+LU = L %*% U;
+
+diff = sum(PA - LU);
+D = matrix(1,1,1);
+D = diff*D;
+
+write(D, $2);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/qr.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/qr.dml b/src/test/scripts/functions/unary/matrix/qr.dml
index 47b87e2..184ff80 100644
--- a/src/test/scripts/functions/unary/matrix/qr.dml
+++ b/src/test/scripts/functions/unary/matrix/qr.dml
@@ -19,53 +19,53 @@
 #
 #-------------------------------------------------------------
 
-
-/*
- * DML script to test QR factorization
- */
-
-A = read($1);
-
-[H,R] = qr(A);
-
-# Compute Q from Householder vectors
-m = nrow(A);
-n = ncol(A);
-
-/*
-ones = matrix(1, m, 1);
-eye = diag(ones);
-Q = eye;
-for( j in n:1 ) {
-    v = H[,j];
-    Qj = eye - 2 * (v %*% t(v))/castAsScalar((t(v)%*%v));
-    Q = Qj %*% Q;
-}
-
-# Q must be orthogonal i.e., Q = Q^{-1}
-I = Q %*% t(Q);
-
-# Multiplying Q and R, we must get back original A
-B = Q %*% R;
-
-d = sum(A-B);
-print("d = " + d);
-diff = 0; #sum(A - B);
-#   write the difference between original A and computed B as a dummy 1x1 matrix
-D = matrix(1,1,1);
-D = diff*D;
-
-write(I, $2);
-write(D, $3);
-*/
-
-# Compute t(Q)%*%b using computed householder vectors
-b = read($2);
-tQb = matrix(0, m, 1);
-min_mn = min(m,n);
-for(j in min_mn:1) {
-  v = H[,j];
-  b = b - 2 * (v %*% (t(v) %*% b)); 
-}
-
-write(b, $3);
+
+/*
+ * DML script to test QR factorization
+ */
+
+A = read($1);
+
+[H,R] = qr(A);
+
+# Compute Q from Householder vectors
+m = nrow(A);
+n = ncol(A);
+
+/*
+ones = matrix(1, m, 1);
+eye = diag(ones);
+Q = eye;
+for( j in n:1 ) {
+    v = H[,j];
+    Qj = eye - 2 * (v %*% t(v))/castAsScalar((t(v)%*%v));
+    Q = Qj %*% Q;
+}
+
+# Q must be orthogonal i.e., Q = Q^{-1}
+I = Q %*% t(Q);
+
+# Multiplying Q and R, we must get back original A
+B = Q %*% R;
+
+d = sum(A-B);
+print("d = " + d);
+diff = 0; #sum(A - B);
+#   write the difference between original A and computed B as a dummy 1x1 matrix
+D = matrix(1,1,1);
+D = diff*D;
+
+write(I, $2);
+write(D, $3);
+*/
+
+# Compute t(Q)%*%b using computed householder vectors
+b = read($2);
+tQb = matrix(0, m, 1);
+min_mn = min(m,n);
+for(j in min_mn:1) {
+  v = H[,j];
+  b = b - 2 * (v %*% (t(v) %*% b)); 
+}
+
+write(b, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/removeEmpty2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/removeEmpty2.dml b/src/test/scripts/functions/unary/matrix/removeEmpty2.dml
index c59641b..a8df80d 100644
--- a/src/test/scripts/functions/unary/matrix/removeEmpty2.dml
+++ b/src/test/scripts/functions/unary/matrix/removeEmpty2.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-V1 = removeEmpty(target=V, margin="rows");
-Vp = removeEmpty(target=V1, margin="cols");
+
+V = read($1);
+V1 = removeEmpty(target=V, margin="rows");
+Vp = removeEmpty(target=V1, margin="cols");
 write(Vp, $3);  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/removeEmpty3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/removeEmpty3.dml b/src/test/scripts/functions/unary/matrix/removeEmpty3.dml
index afdcea3..15fd1a8 100644
--- a/src/test/scripts/functions/unary/matrix/removeEmpty3.dml
+++ b/src/test/scripts/functions/unary/matrix/removeEmpty3.dml
@@ -19,9 +19,9 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1, rows=$2, cols=$3, format="text");
-V1 = diag(V); #diagV2M
-V2 = removeEmpty(target=V1, margin=$4);
-Vp = rowSums(V2);
+
+V = read($1, rows=$2, cols=$3, format="text");
+V1 = diag(V); #diagV2M
+V2 = removeEmpty(target=V1, margin=$4);
+Vp = rowSums(V2);
 write(Vp, $5, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/removeEmpty4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/removeEmpty4.dml b/src/test/scripts/functions/unary/matrix/removeEmpty4.dml
index 4137b52..99f95a2 100644
--- a/src/test/scripts/functions/unary/matrix/removeEmpty4.dml
+++ b/src/test/scripts/functions/unary/matrix/removeEmpty4.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
+
 V = read($1);
-I = read($2);
-Vp = removeEmpty(target=V, margin=$3, select=I);
+I = read($2);
+Vp = removeEmpty(target=V, margin=$3, select=I);
 write(Vp, $4);  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_Infinity.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_Infinity.R b/src/test/scripts/functions/unary/matrix/replace_Infinity.R
index c3cbcf5..2f42e42 100644
--- a/src/test/scripts/functions/unary/matrix/replace_Infinity.R
+++ b/src/test/scripts/functions/unary/matrix/replace_Infinity.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-C <- replace(A, is.infinite(A), 4711);
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+C <- replace(A, is.infinite(A), 4711);
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_Infinity.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_Infinity.dml b/src/test/scripts/functions/unary/matrix/replace_Infinity.dml
index d262b43..233d771 100644
--- a/src/test/scripts/functions/unary/matrix/replace_Infinity.dml
+++ b/src/test/scripts/functions/unary/matrix/replace_Infinity.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");
-if(1==1){} #for replace test in mappers
-C = replace(target=A, pattern=1/0, replacement=4711);
+
+A = read($1, rows=$2, cols=$3, format="text");
+if(1==1){} #for replace test in mappers
+C = replace(target=A, pattern=1/0, replacement=4711);
 write(C, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_NInfinity.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_NInfinity.R b/src/test/scripts/functions/unary/matrix/replace_NInfinity.R
index c3cbcf5..2f42e42 100644
--- a/src/test/scripts/functions/unary/matrix/replace_NInfinity.R
+++ b/src/test/scripts/functions/unary/matrix/replace_NInfinity.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-C <- replace(A, is.infinite(A), 4711);
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+C <- replace(A, is.infinite(A), 4711);
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_NInfinity.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_NInfinity.dml b/src/test/scripts/functions/unary/matrix/replace_NInfinity.dml
index 97ad0c9..42a592d 100644
--- a/src/test/scripts/functions/unary/matrix/replace_NInfinity.dml
+++ b/src/test/scripts/functions/unary/matrix/replace_NInfinity.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");
-if(1==1){} #for replace test in mappers
-C = replace(target=A, pattern=-1/0, replacement=4711);
+
+A = read($1, rows=$2, cols=$3, format="text");
+if(1==1){} #for replace test in mappers
+C = replace(target=A, pattern=-1/0, replacement=4711);
 write(C, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_NaN.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_NaN.R b/src/test/scripts/functions/unary/matrix/replace_NaN.R
index 3fa5576..86e34ab 100644
--- a/src/test/scripts/functions/unary/matrix/replace_NaN.R
+++ b/src/test/scripts/functions/unary/matrix/replace_NaN.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-C <- replace(A, is.nan(A), 4711);
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+C <- replace(A, is.nan(A), 4711);
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_NaN.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_NaN.dml b/src/test/scripts/functions/unary/matrix/replace_NaN.dml
index d61ca6c..edc815c 100644
--- a/src/test/scripts/functions/unary/matrix/replace_NaN.dml
+++ b/src/test/scripts/functions/unary/matrix/replace_NaN.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");
-if(1==1){} #for replace test in mappers
-C = replace(target=A, pattern=0/0, replacement=4711);
+
+A = read($1, rows=$2, cols=$3, format="text");
+if(1==1){} #for replace test in mappers
+C = replace(target=A, pattern=0/0, replacement=4711);
 write(C, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_maxmin.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_maxmin.R b/src/test/scripts/functions/unary/matrix/replace_maxmin.R
index 4352979..f1dea15 100644
--- a/src/test/scripts/functions/unary/matrix/replace_maxmin.R
+++ b/src/test/scripts/functions/unary/matrix/replace_maxmin.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-C <- replace(A, A==as.numeric(min(A)), max(A));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+C <- replace(A, A==as.numeric(min(A)), max(A));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_maxmin.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_maxmin.dml b/src/test/scripts/functions/unary/matrix/replace_maxmin.dml
index d36bf1f..d612ff3 100644
--- a/src/test/scripts/functions/unary/matrix/replace_maxmin.dml
+++ b/src/test/scripts/functions/unary/matrix/replace_maxmin.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");
-C = replace(target=A, pattern=min(A), replacement=max(A));
+
+A = read($1, rows=$2, cols=$3, format="text");
+C = replace(target=A, pattern=min(A), replacement=max(A));
 write(C, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_value.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_value.R b/src/test/scripts/functions/unary/matrix/replace_value.R
index d615885..bfce85b 100644
--- a/src/test/scripts/functions/unary/matrix/replace_value.R
+++ b/src/test/scripts/functions/unary/matrix/replace_value.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-C <- replace(A, A==as.numeric(args[2]), 4711);
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+C <- replace(A, A==as.numeric(args[2]), 4711);
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/matrix/replace_value.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/matrix/replace_value.dml b/src/test/scripts/functions/unary/matrix/replace_value.dml
index 4e81999..8bc7dc0 100644
--- a/src/test/scripts/functions/unary/matrix/replace_value.dml
+++ b/src/test/scripts/functions/unary/matrix/replace_value.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");
-C = replace(target=A, pattern=$5, replacement=4711);
+
+A = read($1, rows=$2, cols=$3, format="text");
+C = replace(target=A, pattern=$5, replacement=4711);
 write(C, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/ACosTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/ACosTest.dml b/src/test/scripts/functions/unary/scalar/ACosTest.dml
index aed2f41..8ef88f1 100644
--- a/src/test/scripts/functions/unary/scalar/ACosTest.dml
+++ b/src/test/scripts/functions/unary/scalar/ACosTest.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.CosTest.java
-
-$$readhelper$$
-
-IntComputation = acos($$int$$);
-IntComputationHelper = IntComputation * Helper;
-write(IntComputationHelper, "$$outdir$$int", format="text");
-
-DoubleComputation = acos($$double$$);
-DoubleComputationHelper = DoubleComputation * Helper;
-write(DoubleComputationHelper, "$$outdir$$double", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.CosTest.java
+
+$$readhelper$$
+
+IntComputation = acos($$int$$);
+IntComputationHelper = IntComputation * Helper;
+write(IntComputationHelper, "$$outdir$$int", format="text");
+
+DoubleComputation = acos($$double$$);
+DoubleComputationHelper = DoubleComputation * Helper;
+write(DoubleComputationHelper, "$$outdir$$double", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/ASinTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/ASinTest.dml b/src/test/scripts/functions/unary/scalar/ASinTest.dml
index 7add846..033e151 100644
--- a/src/test/scripts/functions/unary/scalar/ASinTest.dml
+++ b/src/test/scripts/functions/unary/scalar/ASinTest.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.SinTest.java
-
-$$readhelper$$
-
-IntComputation = asin($$int$$);
-IntComputationHelper = IntComputation * Helper;
-write(IntComputationHelper, "$$outdir$$int", format="text");
-
-DoubleComputation = asin($$double$$);
-DoubleComputationHelper = DoubleComputation * Helper;
-write(DoubleComputationHelper, "$$outdir$$double", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.SinTest.java
+
+$$readhelper$$
+
+IntComputation = asin($$int$$);
+IntComputationHelper = IntComputation * Helper;
+write(IntComputationHelper, "$$outdir$$int", format="text");
+
+DoubleComputation = asin($$double$$);
+DoubleComputationHelper = DoubleComputation * Helper;
+write(DoubleComputationHelper, "$$outdir$$double", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/ATanTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/ATanTest.dml b/src/test/scripts/functions/unary/scalar/ATanTest.dml
index cd4a9a3..8910889 100644
--- a/src/test/scripts/functions/unary/scalar/ATanTest.dml
+++ b/src/test/scripts/functions/unary/scalar/ATanTest.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.TanTest.java
-
-$$readhelper$$
-
-IntComputation = atan($$int$$);
-IntComputationHelper = IntComputation * Helper;
-write(IntComputationHelper, "$$outdir$$int", format="text");
-
-DoubleComputation = atan($$double$$);
-DoubleComputationHelper = DoubleComputation * Helper;
-write(DoubleComputationHelper, "$$outdir$$double", format="text");
+
+# junit test class: org.apache.sysml.test.integration.functions.unary.scalar.TanTest.java
+
+$$readhelper$$
+
+IntComputation = atan($$int$$);
+IntComputationHelper = IntComputation * Helper;
+write(IntComputationHelper, "$$outdir$$int", format="text");
+
+DoubleComputation = atan($$double$$);
+DoubleComputationHelper = DoubleComputation * Helper;
+write(DoubleComputationHelper, "$$outdir$$double", format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.R b/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.R
index 1ae7d73..5efe975 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qchisq(as.numeric(args[1]), df=as.numeric(args[2]));
-p = pchisq(qtle, df=as.numeric(args[2]));
-pl = pchisq(qtle, df=as.numeric(args[2]), lower.tail=FALSE);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[3]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qchisq(as.numeric(args[1]), df=as.numeric(args[2]));
+p = pchisq(qtle, df=as.numeric(args[2]));
+pl = pchisq(qtle, df=as.numeric(args[2]), lower.tail=FALSE);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[3]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.dml b/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.dml
index 75f40a9..ee1acd3 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_CHISQ.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qchisq(target=$1, df=$2);
-p = pchisq(target=q, df=$2);
-pl = pchisq(target=q, df=$2, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $3);
+
+
+q = qchisq(target=$1, df=$2);
+p = pchisq(target=q, df=$2);
+pl = pchisq(target=q, df=$2, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_EXP.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_EXP.R b/src/test/scripts/functions/unary/scalar/DFTest_EXP.R
index ab85886..d99295c 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_EXP.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_EXP.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qexp(as.numeric(args[1]), rate=as.numeric(args[2]));
-p = pexp(qtle, rate=as.numeric(args[2]));
-pl = pexp(qtle, rate=as.numeric(args[2]), lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[3]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qexp(as.numeric(args[1]), rate=as.numeric(args[2]));
+p = pexp(qtle, rate=as.numeric(args[2]));
+pl = pexp(qtle, rate=as.numeric(args[2]), lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[3]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_EXP.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_EXP.dml b/src/test/scripts/functions/unary/scalar/DFTest_EXP.dml
index 260c429..54082e0 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_EXP.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_EXP.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qexp(target=$1, rate=$2);
-p = pexp(target=q, rate=$2);
-pl = pexp(target=q, rate=$2, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $3);
+
+
+q = qexp(target=$1, rate=$2);
+p = pexp(target=q, rate=$2);
+pl = pexp(target=q, rate=$2, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.R b/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.R
index faa75e3..5d15190 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qexp(as.numeric(args[1])); # default rate = 1.0
-p = pexp(qtle);
-pl = pexp(qtle, lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[2]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qexp(as.numeric(args[1])); # default rate = 1.0
+p = pexp(qtle);
+pl = pexp(qtle, lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[2]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.dml b/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.dml
index 057a30b..3e64fe8 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_EXP_NOPARAMS.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qexp(target=$1); # default rate=1.0
-p = pexp(target=q);
-pl = pexp(target=q, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $2);
+
+
+q = qexp(target=$1); # default rate=1.0
+p = pexp(target=q);
+pl = pexp(target=q, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_F.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_F.R b/src/test/scripts/functions/unary/scalar/DFTest_F.R
index b6d96a2..49524a8 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_F.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_F.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qf(as.numeric(args[1]), df1=as.numeric(args[2]), df2=as.numeric(args[3]));
-p = pf(qtle, df1=as.numeric(args[2]), df2=as.numeric(args[3]));
-pl = pf(qtle, df1=as.numeric(args[2]), df2=as.numeric(args[3]), lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[4]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qf(as.numeric(args[1]), df1=as.numeric(args[2]), df2=as.numeric(args[3]));
+p = pf(qtle, df1=as.numeric(args[2]), df2=as.numeric(args[3]));
+pl = pf(qtle, df1=as.numeric(args[2]), df2=as.numeric(args[3]), lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[4]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_F.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_F.dml b/src/test/scripts/functions/unary/scalar/DFTest_F.dml
index 332b729..64162b2 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_F.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_F.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qf(target=$1, df1=$2, df2=$3);
-p = pf(target=q, df1=$2, df2=$3);
-pl = pf(target=q, df1=$2, df2=$3, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $4);
+
+
+q = qf(target=$1, df1=$2, df2=$3);
+p = pf(target=q, df1=$2, df2=$3);
+pl = pf(target=q, df1=$2, df2=$3, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.R b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.R
index 0556b73..9fc46bc 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qnorm(as.numeric(args[1]), mean=as.numeric(args[2]), sd=as.numeric(args[3]));
-p = pnorm(qtle, mean=as.numeric(args[2]), sd=as.numeric(args[3]));
-pl = pnorm(qtle, mean=as.numeric(args[2]), sd=as.numeric(args[3]), lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[4]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qnorm(as.numeric(args[1]), mean=as.numeric(args[2]), sd=as.numeric(args[3]));
+p = pnorm(qtle, mean=as.numeric(args[2]), sd=as.numeric(args[3]));
+pl = pnorm(qtle, mean=as.numeric(args[2]), sd=as.numeric(args[3]), lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[4]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.dml b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.dml
index f7c43b8..2b7a396 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qnorm(target=$1, mean=$2, sd=$3);
-p = pnorm(target=q, mean=$2, sd=$3);
-pl = pnorm(target=q, mean=$2, sd=$3, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $4);
+
+
+q = qnorm(target=$1, mean=$2, sd=$3);
+p = pnorm(target=q, mean=$2, sd=$3);
+pl = pnorm(target=q, mean=$2, sd=$3, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.R b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.R
index 9ab19b8..e5bdcf5 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qnorm(as.numeric(args[1]), mean=as.numeric(args[2]));
-p = pnorm(qtle, mean=as.numeric(args[2]));
-pl = pnorm(qtle, mean=as.numeric(args[2]), lower.tail=FALSE);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[3]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qnorm(as.numeric(args[1]), mean=as.numeric(args[2]));
+p = pnorm(qtle, mean=as.numeric(args[2]));
+pl = pnorm(qtle, mean=as.numeric(args[2]), lower.tail=FALSE);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[3]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.dml b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.dml
index ac7b017..a8429c3 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_MEAN.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qnorm(target=$1, mean=$2);
-p = pnorm(target=q, mean=$2);
-pl = pnorm(target=q, mean=$2, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $3);
+
+
+q = qnorm(target=$1, mean=$2);
+p = pnorm(target=q, mean=$2);
+pl = pnorm(target=q, mean=$2, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.R b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.R
index bc3a0b2..efe4f60 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qnorm(as.numeric(args[1]));
-p = pnorm(qtle);
-pl = pnorm(qtle, lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[2]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qnorm(as.numeric(args[1]));
+p = pnorm(qtle);
+pl = pnorm(qtle, lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[2]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.dml b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.dml
index b2140ee..995e741 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_NOPARAMS.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qnorm(target=$1);
-p = pnorm(target=q);
-pl = pnorm(target=q, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $2);
+
+
+q = qnorm(target=$1);
+p = pnorm(target=q);
+pl = pnorm(target=q, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.R b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.R
index a5134b3..a758584 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.R
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-qtle = qnorm(as.numeric(args[1]), sd=as.numeric(args[2]));
-p = pnorm(qtle, sd=as.numeric(args[2]));
-pl = pnorm(qtle, sd=as.numeric(args[2]), lower.tail=F);
-
-out = matrix(0,nrow=3, ncol=1);
-out[1,1] = qtle;
-out[2,1] = p;
-out[3,1] = pl;
-
-writeMM(as(out, "CsparseMatrix"), args[3]); 
-
+
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+qtle = qnorm(as.numeric(args[1]), sd=as.numeric(args[2]));
+p = pnorm(qtle, sd=as.numeric(args[2]));
+pl = pnorm(qtle, sd=as.numeric(args[2]), lower.tail=F);
+
+out = matrix(0,nrow=3, ncol=1);
+out[1,1] = qtle;
+out[2,1] = p;
+out[3,1] = pl;
+
+writeMM(as(out, "CsparseMatrix"), args[3]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.dml b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.dml
index c2d941f..3ee0288 100644
--- a/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.dml
+++ b/src/test/scripts/functions/unary/scalar/DFTest_NORMAL_SD.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-q = qnorm(target=$1, sd=$2);
-p = pnorm(target=q, sd=$2);
-pl = pnorm(target=q, sd=$2, lower.tail=FALSE);
-
-out = matrix(0, rows=3, cols=1);
-out[1,1] = q;
-out[2,1] = p;
-out[3,1] = pl;
-
-write(out, $3);
+
+
+q = qnorm(target=$1, sd=$2);
+p = pnorm(target=q, sd=$2);
+pl = pnorm(target=q, sd=$2, lower.tail=FALSE);
+
+out = matrix(0, rows=3, cols=1);
+out[1,1] = q;
+out[2,1] = p;
+out[3,1] = pl;
+
+write(out, $3);



[03/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Reverse2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Reverse2.R b/src/test/scripts/functions/reorg/Reverse2.R
index 7537fe9..1599d60 100644
--- a/src/test/scripts/functions/reorg/Reverse2.R
+++ b/src/test/scripts/functions/reorg/Reverse2.R
@@ -1,41 +1,41 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = matrix(0, nrow(A), ncol(A));
-for( i in 1:ncol(A) ) 
-{
-   col = as.vector(A[,i])
-   col = rev(col);
-   B[,i] = col;
-}
-
-writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""))
-
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = matrix(0, nrow(A), ncol(A));
+for( i in 1:ncol(A) ) 
+{
+   col = as.vector(A[,i])
+   col = rev(col);
+   B[,i] = col;
+}
+
+writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""))
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Reverse2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Reverse2.dml b/src/test/scripts/functions/reorg/Reverse2.dml
index b1d796c..e8e68ed 100644
--- a/src/test/scripts/functions/reorg/Reverse2.dml
+++ b/src/test/scripts/functions/reorg/Reverse2.dml
@@ -1,25 +1,25 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = read($1);
-B = table(seq(1,nrow(A),1),seq(nrow(A),1,-1)) %*% A;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+B = table(seq(1,nrow(A),1),seq(nrow(A),1,-1)) %*% A;
 write(B, $2);  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Transpose.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Transpose.R b/src/test/scripts/functions/reorg/Transpose.R
index f1e3467..d4f1b10 100644
--- a/src/test/scripts/functions/reorg/Transpose.R
+++ b/src/test/scripts/functions/reorg/Transpose.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X=readMM(paste(args[1], "X.mtx", sep=""))
-Y=t(X);
-writeMM(as(Y,"CsparseMatrix"), paste(args[2], "Y", sep=""), format="text")
-
-
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X=readMM(paste(args[1], "X.mtx", sep=""))
+Y=t(X);
+writeMM(as(Y,"CsparseMatrix"), paste(args[2], "Y", sep=""), format="text")
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Transpose.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Transpose.dml b/src/test/scripts/functions/reorg/Transpose.dml
index e01ec87..1825e5f 100644
--- a/src/test/scripts/functions/reorg/Transpose.dml
+++ b/src/test/scripts/functions/reorg/Transpose.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3, format="text");
-
-Y = t(X);
+
+X = read($1, rows=$2, cols=$3, format="text");
+
+Y = t(X);
 write(Y, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/CTableRowHist.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/CTableRowHist.R b/src/test/scripts/functions/ternary/CTableRowHist.R
index 0a8453f..8788d3d 100644
--- a/src/test/scripts/functions/ternary/CTableRowHist.R
+++ b/src/test/scripts/functions/ternary/CTableRowHist.R
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-A <- floor(A);
-
-IA = (A != 0) * seq (1, nrow (A), 1);
-IA = matrix (IA, (nrow (A) * ncol(A)), 1, byrow = FALSE);
-VA = matrix ( A, (nrow (A) * ncol(A)), 1, byrow = FALSE);
-#IA = removeEmpty (target = IA, margin = "rows");
-#VA = removeEmpty (target = VA, margin = "rows");
-Btmp1 = table (IA, VA);
-Btmp2 = as.matrix(as.data.frame.matrix(Btmp1));
-
-#remove first row and column (0 values, see missing removeEmpty)
-B = Btmp1[2:nrow(Btmp2),2:ncol(Btmp2)];
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+A <- floor(A);
+
+IA = (A != 0) * seq (1, nrow (A), 1);
+IA = matrix (IA, (nrow (A) * ncol(A)), 1, byrow = FALSE);
+VA = matrix ( A, (nrow (A) * ncol(A)), 1, byrow = FALSE);
+#IA = removeEmpty (target = IA, margin = "rows");
+#VA = removeEmpty (target = VA, margin = "rows");
+Btmp1 = table (IA, VA);
+Btmp2 = as.matrix(as.data.frame.matrix(Btmp1));
+
+#remove first row and column (0 values, see missing removeEmpty)
+B = Btmp1[2:nrow(Btmp2),2:ncol(Btmp2)];
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/CTableSequenceLeft.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/CTableSequenceLeft.R b/src/test/scripts/functions/ternary/CTableSequenceLeft.R
index bb98630..9260aa2 100644
--- a/src/test/scripts/functions/ternary/CTableSequenceLeft.R
+++ b/src/test/scripts/functions/ternary/CTableSequenceLeft.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = as.matrix(as.data.frame.matrix(table(seq(1,nrow(A)), A)));
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = as.matrix(as.data.frame.matrix(table(seq(1,nrow(A)), A)));
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/CTableSequenceRight.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/CTableSequenceRight.R b/src/test/scripts/functions/ternary/CTableSequenceRight.R
index 7be0dae..08e8e7b 100644
--- a/src/test/scripts/functions/ternary/CTableSequenceRight.R
+++ b/src/test/scripts/functions/ternary/CTableSequenceRight.R
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = as.matrix(as.data.frame.matrix(table(A,seq(1,nrow(A)))));
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = as.matrix(as.data.frame.matrix(table(A,seq(1,nrow(A)))));
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/CentralMomentWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/CentralMomentWeights.R b/src/test/scripts/functions/ternary/CentralMomentWeights.R
index a952b8d..dab002c 100644
--- a/src/test/scripts/functions/ternary/CentralMomentWeights.R
+++ b/src/test/scripts/functions/ternary/CentralMomentWeights.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-order = as.integer(args[2]);
-
-s = moment(A*B, order, central=TRUE);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+order = as.integer(args[2]);
+
+s = moment(A*B, order, central=TRUE);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/CovarianceWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/CovarianceWeights.R b/src/test/scripts/functions/ternary/CovarianceWeights.R
index 7cc1576..5d19076 100644
--- a/src/test/scripts/functions/ternary/CovarianceWeights.R
+++ b/src/test/scripts/functions/ternary/CovarianceWeights.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-#note this script assumes weights of 1
-
-s = cov(A,B);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+#note this script assumes weights of 1
+
+s = cov(A,B);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/IQMWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/IQMWeights.R b/src/test/scripts/functions/ternary/IQMWeights.R
index 0ad792c..56c43ad 100644
--- a/src/test/scripts/functions/ternary/IQMWeights.R
+++ b/src/test/scripts/functions/ternary/IQMWeights.R
@@ -19,27 +19,27 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-#without weights (assumes weights of 1)
-m = nrow(A);
-S = sort(A)
-q25d=m*0.25
-q75d=m*0.75
-q25i=ceiling(q25d)
-q75i=ceiling(q75d)
-iqm = sum(S[(q25i+1):q75i])
-iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-iqm = iqm/(m*0.5)
-
-miqm = as.matrix(iqm);
-
-writeMM(as(miqm, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+#without weights (assumes weights of 1)
+m = nrow(A);
+S = sort(A)
+q25d=m*0.25
+q75d=m*0.75
+q25i=ceiling(q25d)
+q75i=ceiling(q75d)
+iqm = sum(S[(q25i+1):q75i])
+iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+iqm = iqm/(m*0.5)
+
+miqm = as.matrix(iqm);
+
+writeMM(as(miqm, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/MedianWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/MedianWeights.R b/src/test/scripts/functions/ternary/MedianWeights.R
index 0399470..1ebf612 100644
--- a/src/test/scripts/functions/ternary/MedianWeights.R
+++ b/src/test/scripts/functions/ternary/MedianWeights.R
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-#without weights (assumes weights of 1)
-s = median(A);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+#without weights (assumes weights of 1)
+s = median(A);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/QuantileWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/QuantileWeights.R b/src/test/scripts/functions/ternary/QuantileWeights.R
index be1c0b1..0ce3c99 100644
--- a/src/test/scripts/functions/ternary/QuantileWeights.R
+++ b/src/test/scripts/functions/ternary/QuantileWeights.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-p = as.double(args[2]);
-
-#without weights (assumes weights of 1)
-s = quantile(A, p);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+p = as.double(args[2]);
+
+#without weights (assumes weights of 1)
+s = quantile(A, p);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/ternary/TableOutputTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/ternary/TableOutputTest.R b/src/test/scripts/functions/ternary/TableOutputTest.R
index e0edc5e..2a4f767 100644
--- a/src/test/scripts/functions/ternary/TableOutputTest.R
+++ b/src/test/scripts/functions/ternary/TableOutputTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-F = as.matrix(as.data.frame.matrix(table(A,B)));
-
-writeMM(as(F, "CsparseMatrix"), paste(args[2], "F", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+F = as.matrix(as.data.frame.matrix(table(A,B)));
+
+writeMM(as(F, "CsparseMatrix"), paste(args[2], "F", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/Apply.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/Apply.dml b/src/test/scripts/functions/transform/Apply.dml
index 11c5d48..8cbec31 100644
--- a/src/test/scripts/functions/transform/Apply.dml
+++ b/src/test/scripts/functions/transform/Apply.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-raw = read($DATA);
-
-A = transform(target = raw, 
-              transformPath = $TFMTD, 
-              applyTransformPath = $APPLYMTD);
-
-write(A, $TFDATA, format=$OFMT);
-
+
+raw = read($DATA);
+
+A = transform(target = raw, 
+              transformPath = $TFMTD, 
+              applyTransformPath = $APPLYMTD);
+
+write(A, $TFDATA, format=$OFMT);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/Scaling.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/Scaling.R b/src/test/scripts/functions/transform/Scaling.R
index ebd9016..a3bfe59 100644
--- a/src/test/scripts/functions/transform/Scaling.R
+++ b/src/test/scripts/functions/transform/Scaling.R
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = read.table(args[1], sep=",");
-B = matrix(0, nrow=nrow(A), ncol=ncol(A));
-
-cols = ncol(A);
-A1 = A[, 1:cols/2];
-A2 = A[,(cols/2+1):cols]
-B[, 1:cols/2] = scale(A1, center=T, scale=F)
-B[, (cols/2+1):cols] = scale(A2)
-
-write.table(B, args[2], sep=",", row.names = F, col.names=F)
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = read.table(args[1], sep=",");
+B = matrix(0, nrow=nrow(A), ncol=ncol(A));
+
+cols = ncol(A);
+A1 = A[, 1:cols/2];
+A2 = A[,(cols/2+1):cols]
+B[, 1:cols/2] = scale(A1, center=T, scale=F)
+B[, (cols/2+1):cols] = scale(A2)
+
+write.table(B, args[2], sep=",", row.names = F, col.names=F)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/Scaling.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/Scaling.dml b/src/test/scripts/functions/transform/Scaling.dml
index 6b575ec..a5bd9b2 100644
--- a/src/test/scripts/functions/transform/Scaling.dml
+++ b/src/test/scripts/functions/transform/Scaling.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-raw = read($DATA);
-
-A = transform(target = raw, 
-              transformPath = $TFMTD, 
-              transformSpec = $TFSPEC);
-
-write(A, $TFDATA, format=$OFMT);
-
+
+raw = read($DATA);
+
+A = transform(target = raw, 
+              transformPath = $TFMTD, 
+              transformSpec = $TFSPEC);
+
+write(A, $TFDATA, format=$OFMT);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/Transform.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/Transform.dml b/src/test/scripts/functions/transform/Transform.dml
index 6b575ec..a5bd9b2 100644
--- a/src/test/scripts/functions/transform/Transform.dml
+++ b/src/test/scripts/functions/transform/Transform.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-raw = read($DATA);
-
-A = transform(target = raw, 
-              transformPath = $TFMTD, 
-              transformSpec = $TFSPEC);
-
-write(A, $TFDATA, format=$OFMT);
-
+
+raw = read($DATA);
+
+A = transform(target = raw, 
+              transformPath = $TFMTD, 
+              transformSpec = $TFSPEC);
+
+write(A, $TFDATA, format=$OFMT);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/Transform_colnames.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/Transform_colnames.dml b/src/test/scripts/functions/transform/Transform_colnames.dml
index b02b612..64a5712 100644
--- a/src/test/scripts/functions/transform/Transform_colnames.dml
+++ b/src/test/scripts/functions/transform/Transform_colnames.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-raw = read($DATA);
-
-A = transform(target = raw, 
-              transformPath = $TFMTD, 
-              transformSpec = $TFSPEC,
-	      outputNames = $COLNAMES);
-
-write(A, $TFDATA, format=$OFMT);
-
+
+raw = read($DATA);
+
+A = transform(target = raw, 
+              transformPath = $TFMTD, 
+              transformSpec = $TFSPEC,
+	      outputNames = $COLNAMES);
+
+write(A, $TFDATA, format=$OFMT);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homes.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homes.csv b/src/test/scripts/functions/transform/input/homes/homes.csv
index c892bf6..487a9e7 100644
--- a/src/test/scripts/functions/transform/input/homes/homes.csv
+++ b/src/test/scripts/functions/transform/input/homes/homes.csv
@@ -1,201 +1,201 @@
-zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
-95141,south,3002,6,3,2,FALSE,929,
-95141,west,1373,7,1,3,FALSE,695,698
-91312,south,3261,6,2,2,FALSE,902,906
-94555,north,1835,3,3,3,TRUE,888,892
-95141,west,2770,5,2.5,,TRUE,812,816
-95141,east,2833,6,2.5,2,TRUE,927,932
-96334,north,1339,6,3,1,,672,675
-96334,south,2742,6,2.5,2,FALSE,872,876
-96334,north,2195,5,2.5,2,FALSE,799,803
-98755,north,3469,7,2.5,2,FALSE,958,963
-95141,south,2777,3,,3,TRUE,837,841
-96334,west,1685,7,1.5,2,TRUE,757,760
-95141,west,2238,4,3,3,FALSE,894,899
-98755,south,2699,4,,3,FALSE,779,783
-91312,west,1245,4,1,1,FALSE,547,549
-95141,west,2233,2,2.5,3,,857,861
-98755,south,3702,7,3,1,FALSE,959,964
-95141,south,2644,4,1.5,3,,854,858
-94555,north,3576,2,,3,TRUE,921,925
-98755,west,1953,1,,1,TRUE,621,624
-98755,north,1865,7,1,2,TRUE,742,745
-94555,north,3837,3,1,1,FALSE,839,842
-91312,west,2139,3,1,3,TRUE,820,824
-95141,north,3824,4,3,1,FALSE,954,958
-98755,east,2858,5,1.5,1,FALSE,759,762
-91312,south,1827,7,3,1,FALSE,735,738
-98755,east,2811,4,1,,FALSE,663,666
-91312,south,3557,2,2.5,1,FALSE,888,892
-91312,,3343,7,1.5,2,TRUE,929,933
-95141,north,1612,6,2,,TRUE,644,647
-91312,south,2553,2,2.5,2,TRUE,884,889
-98755,east,1601,,1,3,TRUE,735,738
-96334,west,1682,3,1.5,1,FALSE,625,628
-98755,south,3926,6,2,2,TRUE,1040,1044
-91312,north,2583,1,2.5,,FALSE,706,710
-98755,south,2056,7,3,1,,766,770
-96334,east,2790,5,2.5,3,FALSE,923,927
-98755,east,2162,5,2.5,2,,792,795
-95141,west,3928,4,2,3,FALSE,1037,1042
-94555,south,2000,1,3,2,TRUE,844,848
-96334,east,2688,5,2,3,TRUE,938,943
-94555,south,3533,2,1.5,2,FALSE,890,894
-95141,north,1080,1,1,2,,566,569
-94555,south,2639,3,2,3,FALSE,876,880
-91312,west,2462,4,2.5,1,TRUE,828,832
-94555,north,2420,1,1.5,3,FALSE,809,813
-91312,north,2575,4,3,,FALSE,746,750
-95141,west,1269,2,2,2,,658,661
-96334,west,3911,6,1.5,3,TRUE,1072,1077
-96334,west,2166,6,1.5,3,TRUE,867,872
-94555,south,3855,6,2,1,FALSE,914,918
-91312,west,1971,2,3,1,TRUE,790,794
-95141,east,1769,4,1,1,TRUE,652,655
-98755,north,3774,6,1,2,TRUE,961,966
-96334,west,1044,2,3,1,TRUE,685,688
-95141,north,2561,7,1.5,1,TRUE,790,794
-94555,north,1714,4,1.5,,FALSE,563,565
-95141,south,2255,2,2,3,TRUE,883,887
-94555,north,3085,6,2,1,FALSE,819,823
-98755,south,1273,2,1.5,2,FALSE,628,631
-91312,west,3785,5,3,3,TRUE,1133,1138
-98755,east,2651,2,2,1,,750,753
-91312,west,3270,7,1.5,3,FALSE,943,947
-98755,south,1749,2,2,2,FALSE,712,715
-98755,south,1625,7,1.5,2,FALSE,691,694
-96334,north,3010,7,1.5,2,FALSE,848,851
-91312,south,3919,5,1,3,TRUE,1033,1038
-95141,north,1745,2,2,,FALSE,587,590
-91312,south,1976,1,2,1,TRUE,726,729
-91312,west,3953,3,1.5,3,FALSE,1006,1010
-95141,west,3439,4,2,2,FALSE,921,925
-94555,east,3570,7,1,2,TRUE,934,938
-98755,west,2484,5,3,2,TRUE,926,931
-91312,west,2628,3,1.5,3,,850,854
-94555,south,1349,3,2,2,TRUE,721,724
-94555,,3858,7,2.5,2,TRUE,1046,1051
-94555,south,1202,4,3,3,,767,771
-98755,west,3967,3,2,3,TRUE,1095,1100
-94555,west,2090,7,3,2,TRUE,885,889
-91312,north,3236,1,,,TRUE,699,702
-94555,west,3931,3,1,,,800,804
-96334,east,1856,5,2,3,TRUE,841,845
-91312,north,1922,2,3,1,TRUE,775,778
-98755,north,1199,3,3,3,FALSE,761,765
-96334,east,2108,3,1,3,TRUE,806,810
-95141,north,1259,3,1,,FALSE,478,481
-96334,west,3901,4,2,2,FALSE,976,981
-94555,south,2654,6,1.5,2,TRUE,859,863
-94555,west,3805,6,2,3,TRUE,1085,1090
-95141,south,3199,4,2,3,FALSE,947,951
-98755,west,3786,5,1,1,TRUE,909,913
-94555,east,2160,1,1,1,FALSE,629,631
-,west,2331,3,2,3,FALSE,842,846
-95141,east,3152,7,2,1,TRUE,883,887
-96334,south,1277,3,2,2,,659,663
-94555,east,1592,2,3,2,TRUE,791,795
-95141,east,3903,1,2.5,2,FALSE,976,981
-91312,south,1076,2,2.5,1,FALSE,597,600
-96334,west,1719,1,1.5,3,FALSE,738,742
-94555,north,1439,4,1.5,1,FALSE,589,592
-91312,east,1961,2,3,1,TRUE,775,778
-95141,south,3534,3,2,,TRUE,861,865
-94555,north,2471,1,1.5,1,TRUE,753,756
-91312,west,3930,4,2.5,2,FALSE,1004,1009
-95141,north,2529,4,1.5,,,660,663
-95141,south,2833,1,1,1,FALSE,718,721
-96334,west,1356,7,1.5,,FALSE,545,548
-96334,south,2580,4,1,2,TRUE,816,820
-94555,south,2169,3,2.5,3,TRUE,904,908
-95141,east,3329,4,3,3,TRUE,1064,1069
-95141,east,3660,1,2.5,2,,948,952
-96334,south,3392,4,2,3,TRUE,1026,1031
-96334,east,3688,6,2.5,3,FALSE,1032,1037
-98755,west,3347,3,2.5,2,TRUE,991,996
-95141,east,1810,5,1,1,FALSE,606,609
-95141,east,3753,1,2.5,2,FALSE,959,963
-94555,east,3906,2,1.5,1,FALSE,866,870
-96334,east,1732,3,2,1,TRUE,700,703
-96334,south,2188,4,2,1,TRUE,767,771
-96334,south,3750,6,2,2,FALSE,963,967
-96334,south,2292,6,,1,TRUE,677,680
-98755,west,1526,6,2.5,,TRUE,673,676
-98755,north,2331,1,1.5,1,TRUE,740,743
-94555,north,1512,4,3,3,TRUE,854,858
-98755,north,3352,3,3,3,FALSE,1014,1018
-96334,north,2378,2,,2,FALSE,669,672
-91312,,1159,7,2.5,1,TRUE,670,673
-94555,south,3426,3,2.5,2,FALSE,937,941
-98755,south,3211,5,3,1,TRUE,948,953
-98755,west,2747,2,2.5,1,FALSE,803,806
-96334,east,3952,6,1.5,1,TRUE,946,950
-91312,north,3814,6,1.5,2,FALSE,934,938
-95141,south,3700,7,2.5,1,FALSE,929,933
-98755,,2448,4,1,2,FALSE,733,736
-95141,west,2629,1,2,,FALSE,696,699
-95141,east,3154,4,2.5,1,TRUE,898,902
-91312,south,2648,4,1.5,2,FALSE,793,797
-98755,,3857,3,1,2,TRUE,949,953
-98755,north,1394,4,1.5,1,FALSE,587,590
-91312,west,2709,5,2,2,FALSE,837,841
-94555,east,3946,6,1,2,TRUE,974,978
-91312,north,3905,6,2,2,FALSE,973,977
-98755,east,3248,5,1.5,1,TRUE,860,864
-96334,north,1774,7,1.5,1,FALSE,644,647
-96334,,1576,4,1,2,TRUE,685,688
-95141,north,2853,,1.5,3,TRUE,912,916
-94555,east,1995,2,3,3,TRUE,897,902
-96334,south,3803,,1,3,TRUE,1001,1006
-94555,east,2876,2,3,1,FALSE,828,832
-98755,east,3553,4,,3,TRUE,925,930
-94555,east,3229,4,2,3,TRUE,995,1000
-94555,north,1079,5,2,2,FALSE,638,641
-95141,south,3695,7,2.5,3,FALSE,1046,1051
-96334,west,3694,5,1,1,TRUE,897,901
-98755,west,1918,5,1,2,FALSE,693,697
-94555,south,1647,6,1,2,TRUE,713,716
-96334,west,2691,3,2.5,2,FALSE,858,862
-95141,south,1333,2,2,2,TRUE,716,719
-95141,west,2609,4,2,1,FALSE,765,768
-98755,west,1725,2,2,3,,772,776
-91312,west,2125,3,1,2,TRUE,760,763
-91312,west,2417,5,1,1,FALSE,689,692
-98755,west,3623,2,1,3,TRUE,995,999
-98755,north,3343,6,3,1,FALSE,908,912
-96334,south,1074,7,2.5,3,FALSE,739,743
-96334,south,2972,3,1,2,TRUE,858,862
-91312,east,1637,2,2,1,FALSE,626,629
-91312,north,1807,2,3,2,FALSE,765,768
-95141,north,1457,2,3,1,FALSE,667,670
-91312,west,3043,6,1,1,FALSE,766,770
-91312,west,3045,6,1.5,3,TRUE,967,972
-91312,north,1444,2,,1,TRUE,552,555
-98755,north,1980,5,1,1,TRUE,688,691
-98755,west,1112,3,1.5,3,TRUE,732,735
-98755,south,1533,6,1.5,3,FALSE,734,738
-91312,east,1442,5,2,2,FALSE,675,678
-91312,north,3171,6,1,3,TRUE,945,949
-96334,east,3072,5,1.5,2,FALSE,842,846
-94555,east,3506,4,1.5,3,TRUE,1000,1005
-94555,south,1574,2,1,3,FALSE,691,694
-95141,south,3521,6,,,FALSE,706,709
-94555,east,3567,6,3,1,FALSE,926,931
-91312,south,1194,1,1,2,TRUE,637,640
-94555,east,1031,3,1.5,1,FALSE,532,535
-,south,3141,2,1.5,3,TRUE,955,960
-94555,south,2776,3,2.5,2,TRUE,916,920
-91312,south,2009,5,1.5,1,TRUE,719,723
-96334,north,3784,2,1,2,FALSE,889,893
-94555,west,1975,6,1.5,2,FALSE,729,732
-98755,west,2444,2,3,2,FALSE,854,857
-95141,south,1684,3,1.5,3,FALSE,737,740
-98755,north,1729,6,1,1,TRUE,663,666
-95141,west,2817,1,1,1,,721,724
-95141,north,2236,1,1,2,FALSE,702,705
-95141,south,2061,7,3,1,FALSE,764,768
-98755,south,3561,3,2.5,3,TRUE,1070,1075
-94555,east,2143,3,1,2,FALSE,694,697
-96334,north,3840,7,1,1,FALSE,858,862
-96334,,1086,2,2.5,2,,647,650
-98755,west,3686,,1,1,TRUE,876,880
+zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
+95141,south,3002,6,3,2,FALSE,929,
+95141,west,1373,7,1,3,FALSE,695,698
+91312,south,3261,6,2,2,FALSE,902,906
+94555,north,1835,3,3,3,TRUE,888,892
+95141,west,2770,5,2.5,,TRUE,812,816
+95141,east,2833,6,2.5,2,TRUE,927,932
+96334,north,1339,6,3,1,,672,675
+96334,south,2742,6,2.5,2,FALSE,872,876
+96334,north,2195,5,2.5,2,FALSE,799,803
+98755,north,3469,7,2.5,2,FALSE,958,963
+95141,south,2777,3,,3,TRUE,837,841
+96334,west,1685,7,1.5,2,TRUE,757,760
+95141,west,2238,4,3,3,FALSE,894,899
+98755,south,2699,4,,3,FALSE,779,783
+91312,west,1245,4,1,1,FALSE,547,549
+95141,west,2233,2,2.5,3,,857,861
+98755,south,3702,7,3,1,FALSE,959,964
+95141,south,2644,4,1.5,3,,854,858
+94555,north,3576,2,,3,TRUE,921,925
+98755,west,1953,1,,1,TRUE,621,624
+98755,north,1865,7,1,2,TRUE,742,745
+94555,north,3837,3,1,1,FALSE,839,842
+91312,west,2139,3,1,3,TRUE,820,824
+95141,north,3824,4,3,1,FALSE,954,958
+98755,east,2858,5,1.5,1,FALSE,759,762
+91312,south,1827,7,3,1,FALSE,735,738
+98755,east,2811,4,1,,FALSE,663,666
+91312,south,3557,2,2.5,1,FALSE,888,892
+91312,,3343,7,1.5,2,TRUE,929,933
+95141,north,1612,6,2,,TRUE,644,647
+91312,south,2553,2,2.5,2,TRUE,884,889
+98755,east,1601,,1,3,TRUE,735,738
+96334,west,1682,3,1.5,1,FALSE,625,628
+98755,south,3926,6,2,2,TRUE,1040,1044
+91312,north,2583,1,2.5,,FALSE,706,710
+98755,south,2056,7,3,1,,766,770
+96334,east,2790,5,2.5,3,FALSE,923,927
+98755,east,2162,5,2.5,2,,792,795
+95141,west,3928,4,2,3,FALSE,1037,1042
+94555,south,2000,1,3,2,TRUE,844,848
+96334,east,2688,5,2,3,TRUE,938,943
+94555,south,3533,2,1.5,2,FALSE,890,894
+95141,north,1080,1,1,2,,566,569
+94555,south,2639,3,2,3,FALSE,876,880
+91312,west,2462,4,2.5,1,TRUE,828,832
+94555,north,2420,1,1.5,3,FALSE,809,813
+91312,north,2575,4,3,,FALSE,746,750
+95141,west,1269,2,2,2,,658,661
+96334,west,3911,6,1.5,3,TRUE,1072,1077
+96334,west,2166,6,1.5,3,TRUE,867,872
+94555,south,3855,6,2,1,FALSE,914,918
+91312,west,1971,2,3,1,TRUE,790,794
+95141,east,1769,4,1,1,TRUE,652,655
+98755,north,3774,6,1,2,TRUE,961,966
+96334,west,1044,2,3,1,TRUE,685,688
+95141,north,2561,7,1.5,1,TRUE,790,794
+94555,north,1714,4,1.5,,FALSE,563,565
+95141,south,2255,2,2,3,TRUE,883,887
+94555,north,3085,6,2,1,FALSE,819,823
+98755,south,1273,2,1.5,2,FALSE,628,631
+91312,west,3785,5,3,3,TRUE,1133,1138
+98755,east,2651,2,2,1,,750,753
+91312,west,3270,7,1.5,3,FALSE,943,947
+98755,south,1749,2,2,2,FALSE,712,715
+98755,south,1625,7,1.5,2,FALSE,691,694
+96334,north,3010,7,1.5,2,FALSE,848,851
+91312,south,3919,5,1,3,TRUE,1033,1038
+95141,north,1745,2,2,,FALSE,587,590
+91312,south,1976,1,2,1,TRUE,726,729
+91312,west,3953,3,1.5,3,FALSE,1006,1010
+95141,west,3439,4,2,2,FALSE,921,925
+94555,east,3570,7,1,2,TRUE,934,938
+98755,west,2484,5,3,2,TRUE,926,931
+91312,west,2628,3,1.5,3,,850,854
+94555,south,1349,3,2,2,TRUE,721,724
+94555,,3858,7,2.5,2,TRUE,1046,1051
+94555,south,1202,4,3,3,,767,771
+98755,west,3967,3,2,3,TRUE,1095,1100
+94555,west,2090,7,3,2,TRUE,885,889
+91312,north,3236,1,,,TRUE,699,702
+94555,west,3931,3,1,,,800,804
+96334,east,1856,5,2,3,TRUE,841,845
+91312,north,1922,2,3,1,TRUE,775,778
+98755,north,1199,3,3,3,FALSE,761,765
+96334,east,2108,3,1,3,TRUE,806,810
+95141,north,1259,3,1,,FALSE,478,481
+96334,west,3901,4,2,2,FALSE,976,981
+94555,south,2654,6,1.5,2,TRUE,859,863
+94555,west,3805,6,2,3,TRUE,1085,1090
+95141,south,3199,4,2,3,FALSE,947,951
+98755,west,3786,5,1,1,TRUE,909,913
+94555,east,2160,1,1,1,FALSE,629,631
+,west,2331,3,2,3,FALSE,842,846
+95141,east,3152,7,2,1,TRUE,883,887
+96334,south,1277,3,2,2,,659,663
+94555,east,1592,2,3,2,TRUE,791,795
+95141,east,3903,1,2.5,2,FALSE,976,981
+91312,south,1076,2,2.5,1,FALSE,597,600
+96334,west,1719,1,1.5,3,FALSE,738,742
+94555,north,1439,4,1.5,1,FALSE,589,592
+91312,east,1961,2,3,1,TRUE,775,778
+95141,south,3534,3,2,,TRUE,861,865
+94555,north,2471,1,1.5,1,TRUE,753,756
+91312,west,3930,4,2.5,2,FALSE,1004,1009
+95141,north,2529,4,1.5,,,660,663
+95141,south,2833,1,1,1,FALSE,718,721
+96334,west,1356,7,1.5,,FALSE,545,548
+96334,south,2580,4,1,2,TRUE,816,820
+94555,south,2169,3,2.5,3,TRUE,904,908
+95141,east,3329,4,3,3,TRUE,1064,1069
+95141,east,3660,1,2.5,2,,948,952
+96334,south,3392,4,2,3,TRUE,1026,1031
+96334,east,3688,6,2.5,3,FALSE,1032,1037
+98755,west,3347,3,2.5,2,TRUE,991,996
+95141,east,1810,5,1,1,FALSE,606,609
+95141,east,3753,1,2.5,2,FALSE,959,963
+94555,east,3906,2,1.5,1,FALSE,866,870
+96334,east,1732,3,2,1,TRUE,700,703
+96334,south,2188,4,2,1,TRUE,767,771
+96334,south,3750,6,2,2,FALSE,963,967
+96334,south,2292,6,,1,TRUE,677,680
+98755,west,1526,6,2.5,,TRUE,673,676
+98755,north,2331,1,1.5,1,TRUE,740,743
+94555,north,1512,4,3,3,TRUE,854,858
+98755,north,3352,3,3,3,FALSE,1014,1018
+96334,north,2378,2,,2,FALSE,669,672
+91312,,1159,7,2.5,1,TRUE,670,673
+94555,south,3426,3,2.5,2,FALSE,937,941
+98755,south,3211,5,3,1,TRUE,948,953
+98755,west,2747,2,2.5,1,FALSE,803,806
+96334,east,3952,6,1.5,1,TRUE,946,950
+91312,north,3814,6,1.5,2,FALSE,934,938
+95141,south,3700,7,2.5,1,FALSE,929,933
+98755,,2448,4,1,2,FALSE,733,736
+95141,west,2629,1,2,,FALSE,696,699
+95141,east,3154,4,2.5,1,TRUE,898,902
+91312,south,2648,4,1.5,2,FALSE,793,797
+98755,,3857,3,1,2,TRUE,949,953
+98755,north,1394,4,1.5,1,FALSE,587,590
+91312,west,2709,5,2,2,FALSE,837,841
+94555,east,3946,6,1,2,TRUE,974,978
+91312,north,3905,6,2,2,FALSE,973,977
+98755,east,3248,5,1.5,1,TRUE,860,864
+96334,north,1774,7,1.5,1,FALSE,644,647
+96334,,1576,4,1,2,TRUE,685,688
+95141,north,2853,,1.5,3,TRUE,912,916
+94555,east,1995,2,3,3,TRUE,897,902
+96334,south,3803,,1,3,TRUE,1001,1006
+94555,east,2876,2,3,1,FALSE,828,832
+98755,east,3553,4,,3,TRUE,925,930
+94555,east,3229,4,2,3,TRUE,995,1000
+94555,north,1079,5,2,2,FALSE,638,641
+95141,south,3695,7,2.5,3,FALSE,1046,1051
+96334,west,3694,5,1,1,TRUE,897,901
+98755,west,1918,5,1,2,FALSE,693,697
+94555,south,1647,6,1,2,TRUE,713,716
+96334,west,2691,3,2.5,2,FALSE,858,862
+95141,south,1333,2,2,2,TRUE,716,719
+95141,west,2609,4,2,1,FALSE,765,768
+98755,west,1725,2,2,3,,772,776
+91312,west,2125,3,1,2,TRUE,760,763
+91312,west,2417,5,1,1,FALSE,689,692
+98755,west,3623,2,1,3,TRUE,995,999
+98755,north,3343,6,3,1,FALSE,908,912
+96334,south,1074,7,2.5,3,FALSE,739,743
+96334,south,2972,3,1,2,TRUE,858,862
+91312,east,1637,2,2,1,FALSE,626,629
+91312,north,1807,2,3,2,FALSE,765,768
+95141,north,1457,2,3,1,FALSE,667,670
+91312,west,3043,6,1,1,FALSE,766,770
+91312,west,3045,6,1.5,3,TRUE,967,972
+91312,north,1444,2,,1,TRUE,552,555
+98755,north,1980,5,1,1,TRUE,688,691
+98755,west,1112,3,1.5,3,TRUE,732,735
+98755,south,1533,6,1.5,3,FALSE,734,738
+91312,east,1442,5,2,2,FALSE,675,678
+91312,north,3171,6,1,3,TRUE,945,949
+96334,east,3072,5,1.5,2,FALSE,842,846
+94555,east,3506,4,1.5,3,TRUE,1000,1005
+94555,south,1574,2,1,3,FALSE,691,694
+95141,south,3521,6,,,FALSE,706,709
+94555,east,3567,6,3,1,FALSE,926,931
+91312,south,1194,1,1,2,TRUE,637,640
+94555,east,1031,3,1.5,1,FALSE,532,535
+,south,3141,2,1.5,3,TRUE,955,960
+94555,south,2776,3,2.5,2,TRUE,916,920
+91312,south,2009,5,1.5,1,TRUE,719,723
+96334,north,3784,2,1,2,FALSE,889,893
+94555,west,1975,6,1.5,2,FALSE,729,732
+98755,west,2444,2,3,2,FALSE,854,857
+95141,south,1684,3,1.5,3,FALSE,737,740
+98755,north,1729,6,1,1,TRUE,663,666
+95141,west,2817,1,1,1,,721,724
+95141,north,2236,1,1,2,FALSE,702,705
+95141,south,2061,7,3,1,FALSE,764,768
+98755,south,3561,3,2.5,3,TRUE,1070,1075
+94555,east,2143,3,1,2,FALSE,694,697
+96334,north,3840,7,1,1,FALSE,858,862
+96334,,1086,2,2.5,2,,647,650
+98755,west,3686,,1,1,TRUE,876,880

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homesAllMissing.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homesAllMissing.csv b/src/test/scripts/functions/transform/input/homes/homesAllMissing.csv
index a5a2e2b..0e1cf0c 100644
--- a/src/test/scripts/functions/transform/input/homes/homesAllMissing.csv
+++ b/src/test/scripts/functions/transform/input/homes/homesAllMissing.csv
@@ -1,201 +1,201 @@
-zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
-95141,south,3002,6,3,2,FALSE,,
-95141,west,1373,7,1,3,FALSE,,698
-91312,south,3261,6,2,2,FALSE,,906
-94555,north,1835,3,3,3,,888,892
-95141,west,2770,5,2.5,,TRUE,812,816
-95141,east,2833,6,2.5,2,,927,932
-96334,north,1339,6,3,1,,672,675
-96334,south,2742,6,2.5,2,FALSE,,876
-96334,north,2195,5,2.5,2,FALSE,,803
-98755,north,3469,7,2.5,2,FALSE,,963
-95141,south,2777,3,,3,,837,841
-96334,west,1685,7,1.5,2,,757,760
-95141,west,2238,4,3,3,FALSE,,899
-98755,south,2699,4,,3,FALSE,,783
-91312,west,1245,4,1,1,FALSE,,549
-95141,west,2233,2,2.5,3,,857,861
-98755,south,3702,7,3,1,FALSE,,964
-95141,south,2644,4,1.5,3,,854,858
-94555,north,3576,2,,3,,921,925
-98755,west,1953,1,,1,,621,624
-98755,north,1865,7,1,2,,742,745
-94555,north,3837,3,1,1,FALSE,,842
-91312,west,2139,3,1,3,,820,824
-95141,north,3824,4,3,1,FALSE,,958
-98755,east,2858,5,1.5,1,FALSE,,762
-91312,south,1827,7,3,1,FALSE,,738
-98755,east,2811,4,1,,FALSE,,666
-91312,south,3557,2,2.5,1,FALSE,,892
-91312,,3343,7,1.5,2,,929,933
-95141,north,1612,6,2,,,644,647
-91312,south,2553,2,2.5,2,,884,889
-98755,east,1601,,1,3,,735,738
-96334,west,1682,3,1.5,1,FALSE,,628
-98755,south,3926,6,2,2,,1040,1044
-91312,north,2583,1,2.5,,FALSE,,710
-98755,south,2056,7,3,1,,766,770
-96334,east,2790,5,2.5,3,FALSE,,927
-98755,east,2162,5,2.5,2,,792,795
-95141,west,3928,4,2,3,FALSE,,1042
-94555,south,2000,1,3,2,,844,848
-96334,east,2688,5,2,3,,938,943
-94555,south,3533,2,1.5,2,FALSE,,894
-95141,north,1080,1,1,2,,566,569
-94555,south,2639,3,2,3,FALSE,,880
-91312,west,2462,4,2.5,1,,828,832
-94555,north,2420,1,1.5,3,FALSE,,813
-91312,north,2575,4,3,,FALSE,,750
-95141,west,1269,2,2,2,,658,661
-96334,west,3911,6,1.5,3,,1072,1077
-96334,west,2166,6,1.5,3,,867,872
-94555,south,3855,6,2,1,FALSE,,918
-91312,west,1971,2,3,1,,790,794
-95141,east,1769,4,1,1,,652,655
-98755,north,3774,6,1,2,,961,966
-96334,west,1044,2,3,1,,685,688
-95141,north,2561,7,1.5,1,,790,794
-94555,north,1714,4,1.5,,FALSE,,565
-95141,south,2255,2,2,3,,883,887
-94555,north,3085,6,2,1,FALSE,,823
-98755,south,1273,2,1.5,2,FALSE,,631
-91312,west,3785,5,3,3,,1133,1138
-98755,east,2651,2,2,1,,750,753
-91312,west,3270,7,1.5,3,FALSE,,947
-98755,south,1749,2,2,2,FALSE,,715
-98755,south,1625,7,1.5,2,FALSE,,694
-96334,north,3010,7,1.5,2,FALSE,,851
-91312,south,3919,5,1,3,,1033,1038
-95141,north,1745,2,2,,FALSE,,590
-91312,south,1976,1,2,1,,726,729
-91312,west,3953,3,1.5,3,FALSE,,1010
-95141,west,3439,4,2,2,FALSE,,925
-94555,east,3570,7,1,2,,934,938
-98755,west,2484,5,3,2,,926,931
-91312,west,2628,3,1.5,3,,850,854
-94555,south,1349,3,2,2,,721,724
-94555,,3858,7,2.5,2,,1046,1051
-94555,south,1202,4,3,3,,767,771
-98755,west,3967,3,2,3,,1095,1100
-94555,west,2090,7,3,2,,885,889
-91312,north,3236,1,,,,699,702
-94555,west,3931,3,1,,,800,804
-96334,east,1856,5,2,3,,841,845
-91312,north,1922,2,3,1,,775,778
-98755,north,1199,3,3,3,FALSE,,765
-96334,east,2108,3,1,3,,806,810
-95141,north,1259,3,1,,FALSE,,481
-96334,west,3901,4,2,2,FALSE,,981
-94555,south,2654,6,1.5,2,,859,863
-94555,west,3805,6,2,3,,1085,1090
-95141,south,3199,4,2,3,FALSE,,951
-98755,west,3786,5,1,1,,909,913
-94555,east,2160,1,1,1,FALSE,,631
-,west,2331,3,2,3,FALSE,,846
-95141,east,3152,7,2,1,,883,887
-96334,south,1277,3,2,2,,659,663
-94555,east,1592,2,3,2,,791,795
-95141,east,3903,1,2.5,2,FALSE,,981
-91312,south,1076,2,2.5,1,FALSE,,600
-96334,west,1719,1,1.5,3,FALSE,,742
-94555,north,1439,4,1.5,1,FALSE,,592
-91312,east,1961,2,3,1,,775,778
-95141,south,3534,3,2,,,861,865
-94555,north,2471,1,1.5,1,,753,756
-91312,west,3930,4,2.5,2,FALSE,,1009
-95141,north,2529,4,1.5,,,660,663
-95141,south,2833,1,1,1,FALSE,,721
-96334,west,1356,7,1.5,,FALSE,,548
-96334,south,2580,4,1,2,,816,820
-94555,south,2169,3,2.5,3,,904,908
-95141,east,3329,4,3,3,,1064,1069
-95141,east,3660,1,2.5,2,,948,952
-96334,south,3392,4,2,3,,1026,1031
-96334,east,3688,6,2.5,3,FALSE,,1037
-98755,west,3347,3,2.5,2,,991,996
-95141,east,1810,5,1,1,FALSE,,609
-95141,east,3753,1,2.5,2,FALSE,,963
-94555,east,3906,2,1.5,1,FALSE,,870
-96334,east,1732,3,2,1,,700,703
-96334,south,2188,4,2,1,,767,771
-96334,south,3750,6,2,2,FALSE,,967
-96334,south,2292,6,,1,,677,680
-98755,west,1526,6,2.5,,,673,676
-98755,north,2331,1,1.5,1,,740,743
-94555,north,1512,4,3,3,,854,858
-98755,north,3352,3,3,3,FALSE,,1018
-96334,north,2378,2,,2,FALSE,,672
-91312,,1159,7,2.5,1,,670,673
-94555,south,3426,3,2.5,2,FALSE,,941
-98755,south,3211,5,3,1,,948,953
-98755,west,2747,2,2.5,1,FALSE,,806
-96334,east,3952,6,1.5,1,,946,950
-91312,north,3814,6,1.5,2,FALSE,,938
-95141,south,3700,7,2.5,1,FALSE,,933
-98755,,2448,4,1,2,FALSE,,736
-95141,west,2629,1,2,,FALSE,,699
-95141,east,3154,4,2.5,1,,898,902
-91312,south,2648,4,1.5,2,FALSE,,797
-98755,,3857,3,1,2,,949,953
-98755,north,1394,4,1.5,1,FALSE,,590
-91312,west,2709,5,2,2,FALSE,,841
-94555,east,3946,6,1,2,,974,978
-91312,north,3905,6,2,2,FALSE,,977
-98755,east,3248,5,1.5,1,,860,864
-96334,north,1774,7,1.5,1,FALSE,,647
-96334,,1576,4,1,2,,685,688
-95141,north,2853,,1.5,3,,912,916
-94555,east,1995,2,3,3,,897,902
-96334,south,3803,,1,3,,1001,1006
-94555,east,2876,2,3,1,FALSE,,832
-98755,east,3553,4,,3,,925,930
-94555,east,3229,4,2,3,,995,1000
-94555,north,1079,5,2,2,FALSE,,641
-95141,south,3695,7,2.5,3,FALSE,,1051
-96334,west,3694,5,1,1,,897,901
-98755,west,1918,5,1,2,FALSE,,697
-94555,south,1647,6,1,2,,713,716
-96334,west,2691,3,2.5,2,FALSE,,862
-95141,south,1333,2,2,2,,716,719
-95141,west,2609,4,2,1,FALSE,,768
-98755,west,1725,2,2,3,,772,776
-91312,west,2125,3,1,2,,760,763
-91312,west,2417,5,1,1,FALSE,,692
-98755,west,3623,2,1,3,,995,999
-98755,north,3343,6,3,1,FALSE,,912
-96334,south,1074,7,2.5,3,FALSE,,743
-96334,south,2972,3,1,2,,858,862
-91312,east,1637,2,2,1,FALSE,,629
-91312,north,1807,2,3,2,FALSE,,768
-95141,north,1457,2,3,1,FALSE,,670
-91312,west,3043,6,1,1,FALSE,,770
-91312,west,3045,6,1.5,3,,967,972
-91312,north,1444,2,,1,,552,555
-98755,north,1980,5,1,1,,688,691
-98755,west,1112,3,1.5,3,,732,735
-98755,south,1533,6,1.5,3,FALSE,,738
-91312,east,1442,5,2,2,FALSE,,678
-91312,north,3171,6,1,3,,945,949
-96334,east,3072,5,1.5,2,FALSE,,846
-94555,east,3506,4,1.5,3,,1000,1005
-94555,south,1574,2,1,3,FALSE,,694
-95141,south,3521,6,,,FALSE,,709
-94555,east,3567,6,3,1,FALSE,,931
-91312,south,1194,1,1,2,,637,640
-94555,east,1031,3,1.5,1,FALSE,,535
-,south,3141,2,1.5,3,,955,960
-94555,south,2776,3,2.5,2,,916,920
-91312,south,2009,5,1.5,1,,719,723
-96334,north,3784,2,1,2,FALSE,,893
-94555,west,1975,6,1.5,2,FALSE,,732
-98755,west,2444,2,3,2,FALSE,,857
-95141,south,1684,3,1.5,3,FALSE,,740
-98755,north,1729,6,1,1,,663,666
-95141,west,2817,1,1,1,,721,724
-95141,north,2236,1,1,2,FALSE,,705
-95141,south,2061,7,3,1,FALSE,,768
-98755,south,3561,3,2.5,3,,1070,1075
-94555,east,2143,3,1,2,FALSE,,697
-96334,north,3840,7,1,1,FALSE,,862
-96334,,1086,2,2.5,2,,647,650
-98755,west,3686,,1,1,,876,880
+zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
+95141,south,3002,6,3,2,FALSE,,
+95141,west,1373,7,1,3,FALSE,,698
+91312,south,3261,6,2,2,FALSE,,906
+94555,north,1835,3,3,3,,888,892
+95141,west,2770,5,2.5,,TRUE,812,816
+95141,east,2833,6,2.5,2,,927,932
+96334,north,1339,6,3,1,,672,675
+96334,south,2742,6,2.5,2,FALSE,,876
+96334,north,2195,5,2.5,2,FALSE,,803
+98755,north,3469,7,2.5,2,FALSE,,963
+95141,south,2777,3,,3,,837,841
+96334,west,1685,7,1.5,2,,757,760
+95141,west,2238,4,3,3,FALSE,,899
+98755,south,2699,4,,3,FALSE,,783
+91312,west,1245,4,1,1,FALSE,,549
+95141,west,2233,2,2.5,3,,857,861
+98755,south,3702,7,3,1,FALSE,,964
+95141,south,2644,4,1.5,3,,854,858
+94555,north,3576,2,,3,,921,925
+98755,west,1953,1,,1,,621,624
+98755,north,1865,7,1,2,,742,745
+94555,north,3837,3,1,1,FALSE,,842
+91312,west,2139,3,1,3,,820,824
+95141,north,3824,4,3,1,FALSE,,958
+98755,east,2858,5,1.5,1,FALSE,,762
+91312,south,1827,7,3,1,FALSE,,738
+98755,east,2811,4,1,,FALSE,,666
+91312,south,3557,2,2.5,1,FALSE,,892
+91312,,3343,7,1.5,2,,929,933
+95141,north,1612,6,2,,,644,647
+91312,south,2553,2,2.5,2,,884,889
+98755,east,1601,,1,3,,735,738
+96334,west,1682,3,1.5,1,FALSE,,628
+98755,south,3926,6,2,2,,1040,1044
+91312,north,2583,1,2.5,,FALSE,,710
+98755,south,2056,7,3,1,,766,770
+96334,east,2790,5,2.5,3,FALSE,,927
+98755,east,2162,5,2.5,2,,792,795
+95141,west,3928,4,2,3,FALSE,,1042
+94555,south,2000,1,3,2,,844,848
+96334,east,2688,5,2,3,,938,943
+94555,south,3533,2,1.5,2,FALSE,,894
+95141,north,1080,1,1,2,,566,569
+94555,south,2639,3,2,3,FALSE,,880
+91312,west,2462,4,2.5,1,,828,832
+94555,north,2420,1,1.5,3,FALSE,,813
+91312,north,2575,4,3,,FALSE,,750
+95141,west,1269,2,2,2,,658,661
+96334,west,3911,6,1.5,3,,1072,1077
+96334,west,2166,6,1.5,3,,867,872
+94555,south,3855,6,2,1,FALSE,,918
+91312,west,1971,2,3,1,,790,794
+95141,east,1769,4,1,1,,652,655
+98755,north,3774,6,1,2,,961,966
+96334,west,1044,2,3,1,,685,688
+95141,north,2561,7,1.5,1,,790,794
+94555,north,1714,4,1.5,,FALSE,,565
+95141,south,2255,2,2,3,,883,887
+94555,north,3085,6,2,1,FALSE,,823
+98755,south,1273,2,1.5,2,FALSE,,631
+91312,west,3785,5,3,3,,1133,1138
+98755,east,2651,2,2,1,,750,753
+91312,west,3270,7,1.5,3,FALSE,,947
+98755,south,1749,2,2,2,FALSE,,715
+98755,south,1625,7,1.5,2,FALSE,,694
+96334,north,3010,7,1.5,2,FALSE,,851
+91312,south,3919,5,1,3,,1033,1038
+95141,north,1745,2,2,,FALSE,,590
+91312,south,1976,1,2,1,,726,729
+91312,west,3953,3,1.5,3,FALSE,,1010
+95141,west,3439,4,2,2,FALSE,,925
+94555,east,3570,7,1,2,,934,938
+98755,west,2484,5,3,2,,926,931
+91312,west,2628,3,1.5,3,,850,854
+94555,south,1349,3,2,2,,721,724
+94555,,3858,7,2.5,2,,1046,1051
+94555,south,1202,4,3,3,,767,771
+98755,west,3967,3,2,3,,1095,1100
+94555,west,2090,7,3,2,,885,889
+91312,north,3236,1,,,,699,702
+94555,west,3931,3,1,,,800,804
+96334,east,1856,5,2,3,,841,845
+91312,north,1922,2,3,1,,775,778
+98755,north,1199,3,3,3,FALSE,,765
+96334,east,2108,3,1,3,,806,810
+95141,north,1259,3,1,,FALSE,,481
+96334,west,3901,4,2,2,FALSE,,981
+94555,south,2654,6,1.5,2,,859,863
+94555,west,3805,6,2,3,,1085,1090
+95141,south,3199,4,2,3,FALSE,,951
+98755,west,3786,5,1,1,,909,913
+94555,east,2160,1,1,1,FALSE,,631
+,west,2331,3,2,3,FALSE,,846
+95141,east,3152,7,2,1,,883,887
+96334,south,1277,3,2,2,,659,663
+94555,east,1592,2,3,2,,791,795
+95141,east,3903,1,2.5,2,FALSE,,981
+91312,south,1076,2,2.5,1,FALSE,,600
+96334,west,1719,1,1.5,3,FALSE,,742
+94555,north,1439,4,1.5,1,FALSE,,592
+91312,east,1961,2,3,1,,775,778
+95141,south,3534,3,2,,,861,865
+94555,north,2471,1,1.5,1,,753,756
+91312,west,3930,4,2.5,2,FALSE,,1009
+95141,north,2529,4,1.5,,,660,663
+95141,south,2833,1,1,1,FALSE,,721
+96334,west,1356,7,1.5,,FALSE,,548
+96334,south,2580,4,1,2,,816,820
+94555,south,2169,3,2.5,3,,904,908
+95141,east,3329,4,3,3,,1064,1069
+95141,east,3660,1,2.5,2,,948,952
+96334,south,3392,4,2,3,,1026,1031
+96334,east,3688,6,2.5,3,FALSE,,1037
+98755,west,3347,3,2.5,2,,991,996
+95141,east,1810,5,1,1,FALSE,,609
+95141,east,3753,1,2.5,2,FALSE,,963
+94555,east,3906,2,1.5,1,FALSE,,870
+96334,east,1732,3,2,1,,700,703
+96334,south,2188,4,2,1,,767,771
+96334,south,3750,6,2,2,FALSE,,967
+96334,south,2292,6,,1,,677,680
+98755,west,1526,6,2.5,,,673,676
+98755,north,2331,1,1.5,1,,740,743
+94555,north,1512,4,3,3,,854,858
+98755,north,3352,3,3,3,FALSE,,1018
+96334,north,2378,2,,2,FALSE,,672
+91312,,1159,7,2.5,1,,670,673
+94555,south,3426,3,2.5,2,FALSE,,941
+98755,south,3211,5,3,1,,948,953
+98755,west,2747,2,2.5,1,FALSE,,806
+96334,east,3952,6,1.5,1,,946,950
+91312,north,3814,6,1.5,2,FALSE,,938
+95141,south,3700,7,2.5,1,FALSE,,933
+98755,,2448,4,1,2,FALSE,,736
+95141,west,2629,1,2,,FALSE,,699
+95141,east,3154,4,2.5,1,,898,902
+91312,south,2648,4,1.5,2,FALSE,,797
+98755,,3857,3,1,2,,949,953
+98755,north,1394,4,1.5,1,FALSE,,590
+91312,west,2709,5,2,2,FALSE,,841
+94555,east,3946,6,1,2,,974,978
+91312,north,3905,6,2,2,FALSE,,977
+98755,east,3248,5,1.5,1,,860,864
+96334,north,1774,7,1.5,1,FALSE,,647
+96334,,1576,4,1,2,,685,688
+95141,north,2853,,1.5,3,,912,916
+94555,east,1995,2,3,3,,897,902
+96334,south,3803,,1,3,,1001,1006
+94555,east,2876,2,3,1,FALSE,,832
+98755,east,3553,4,,3,,925,930
+94555,east,3229,4,2,3,,995,1000
+94555,north,1079,5,2,2,FALSE,,641
+95141,south,3695,7,2.5,3,FALSE,,1051
+96334,west,3694,5,1,1,,897,901
+98755,west,1918,5,1,2,FALSE,,697
+94555,south,1647,6,1,2,,713,716
+96334,west,2691,3,2.5,2,FALSE,,862
+95141,south,1333,2,2,2,,716,719
+95141,west,2609,4,2,1,FALSE,,768
+98755,west,1725,2,2,3,,772,776
+91312,west,2125,3,1,2,,760,763
+91312,west,2417,5,1,1,FALSE,,692
+98755,west,3623,2,1,3,,995,999
+98755,north,3343,6,3,1,FALSE,,912
+96334,south,1074,7,2.5,3,FALSE,,743
+96334,south,2972,3,1,2,,858,862
+91312,east,1637,2,2,1,FALSE,,629
+91312,north,1807,2,3,2,FALSE,,768
+95141,north,1457,2,3,1,FALSE,,670
+91312,west,3043,6,1,1,FALSE,,770
+91312,west,3045,6,1.5,3,,967,972
+91312,north,1444,2,,1,,552,555
+98755,north,1980,5,1,1,,688,691
+98755,west,1112,3,1.5,3,,732,735
+98755,south,1533,6,1.5,3,FALSE,,738
+91312,east,1442,5,2,2,FALSE,,678
+91312,north,3171,6,1,3,,945,949
+96334,east,3072,5,1.5,2,FALSE,,846
+94555,east,3506,4,1.5,3,,1000,1005
+94555,south,1574,2,1,3,FALSE,,694
+95141,south,3521,6,,,FALSE,,709
+94555,east,3567,6,3,1,FALSE,,931
+91312,south,1194,1,1,2,,637,640
+94555,east,1031,3,1.5,1,FALSE,,535
+,south,3141,2,1.5,3,,955,960
+94555,south,2776,3,2.5,2,,916,920
+91312,south,2009,5,1.5,1,,719,723
+96334,north,3784,2,1,2,FALSE,,893
+94555,west,1975,6,1.5,2,FALSE,,732
+98755,west,2444,2,3,2,FALSE,,857
+95141,south,1684,3,1.5,3,FALSE,,740
+98755,north,1729,6,1,1,,663,666
+95141,west,2817,1,1,1,,721,724
+95141,north,2236,1,1,2,FALSE,,705
+95141,south,2061,7,3,1,FALSE,,768
+98755,south,3561,3,2.5,3,,1070,1075
+94555,east,2143,3,1,2,FALSE,,697
+96334,north,3840,7,1,1,FALSE,,862
+96334,,1086,2,2.5,2,,647,650
+98755,west,3686,,1,1,,876,880

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfidspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfidspec.json b/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfidspec.json
index c3912e7..e97149f 100644
--- a/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfidspec.json
+++ b/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfidspec.json
@@ -1,5 +1,5 @@
-{
-    "ids": true
-    ,"omit": [ 1,2,3,4,5,6,7,8,9 ]
-    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
-}
+{
+    "ids": true
+    ,"omit": [ 1,2,3,4,5,6,7,8,9 ]
+    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfspec.json b/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfspec.json
index b14d1f1..c69faf1 100644
--- a/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfspec.json
+++ b/src/test/scripts/functions/transform/input/homes/homesAllMissing.tfspec.json
@@ -1,4 +1,4 @@
-{
-    "omit": [ "zipcode","district","sqft","numbedrooms","numbathrooms","floors","view","saleprice","askingprice" ]
-    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
-}
+{
+    "omit": [ "zipcode","district","sqft","numbedrooms","numbathrooms","floors","view","saleprice","askingprice" ]
+    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homesOmit.tfidspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homesOmit.tfidspec.json b/src/test/scripts/functions/transform/input/homes/homesOmit.tfidspec.json
index a287e9f..c81ccd0 100644
--- a/src/test/scripts/functions/transform/input/homes/homesOmit.tfidspec.json
+++ b/src/test/scripts/functions/transform/input/homes/homesOmit.tfidspec.json
@@ -1,14 +1,14 @@
-{
-    "ids": true
-    ,"omit": [ 1,2,3,4,5,6,7,8,9 ]
-    
-    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
-    
-    ,"bin": [
-                 { "id": 8, "method": "equi-width", "numbins": 3 }
-                ,{ "id": 3, "method": "equi-width", "numbins": 4 }
-            ]
-
-    ,"dummycode": [ 2, 5, 6, 7, 8, 3 ]
-    
-}
+{
+    "ids": true
+    ,"omit": [ 1,2,3,4,5,6,7,8,9 ]
+    
+    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
+    
+    ,"bin": [
+                 { "id": 8, "method": "equi-width", "numbins": 3 }
+                ,{ "id": 3, "method": "equi-width", "numbins": 4 }
+            ]
+
+    ,"dummycode": [ 2, 5, 6, 7, 8, 3 ]
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes/homesOmit.tfspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes/homesOmit.tfspec.json b/src/test/scripts/functions/transform/input/homes/homesOmit.tfspec.json
index 81b1f66..f33560e 100644
--- a/src/test/scripts/functions/transform/input/homes/homesOmit.tfspec.json
+++ b/src/test/scripts/functions/transform/input/homes/homesOmit.tfspec.json
@@ -1,13 +1,13 @@
-{
-    "omit": ["zipcode","district","sqft","numbedrooms","numbathrooms","floors","view","saleprice","askingprice"]
-    
-    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
-    
-    ,"bin": [
-                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
-                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
-            ]
-
-    ,"dummycode": [ "district", "numbathrooms", "floors", "view", "saleprice", "sqft" ]
-    
-}
+{
+    "omit": ["zipcode","district","sqft","numbedrooms","numbathrooms","floors","view","saleprice","askingprice"]
+    
+    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
+    
+    ,"bin": [
+                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
+                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
+            ]
+
+    ,"dummycode": [ "district", "numbathrooms", "floors", "view", "saleprice", "sqft" ]
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes2/homes.csv/homes1.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes2/homes.csv/homes1.csv b/src/test/scripts/functions/transform/input/homes2/homes.csv/homes1.csv
index 9366170..c2d5fe5 100644
--- a/src/test/scripts/functions/transform/input/homes2/homes.csv/homes1.csv
+++ b/src/test/scripts/functions/transform/input/homes2/homes.csv/homes1.csv
@@ -1,99 +1,99 @@
-zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
-95141,south,3002,6,3,2,FALSE,929,
-95141,west,1373,7,1,3,FALSE,695,698
-91312,south,3261,6,2,2,FALSE,902,906
-94555,north,1835,3,3,3,TRUE,888,892
-95141,west,2770,5,2.5,,TRUE,812,816
-95141,east,2833,6,2.5,2,TRUE,927,932
-96334,north,1339,6,3,1,,672,675
-96334,south,2742,6,2.5,2,FALSE,872,876
-96334,north,2195,5,2.5,2,FALSE,799,803
-98755,north,3469,7,2.5,2,FALSE,958,963
-95141,south,2777,3,,3,TRUE,837,841
-96334,west,1685,7,1.5,2,TRUE,757,760
-95141,west,2238,4,3,3,FALSE,894,899
-98755,south,2699,4,,3,FALSE,779,783
-91312,west,1245,4,1,1,FALSE,547,549
-95141,west,2233,2,2.5,3,,857,861
-98755,south,3702,7,3,1,FALSE,959,964
-95141,south,2644,4,1.5,3,,854,858
-94555,north,3576,2,,3,TRUE,921,925
-98755,west,1953,1,,1,TRUE,621,624
-98755,north,1865,7,1,2,TRUE,742,745
-94555,north,3837,3,1,1,FALSE,839,842
-91312,west,2139,3,1,3,TRUE,820,824
-95141,north,3824,4,3,1,FALSE,954,958
-98755,east,2858,5,1.5,1,FALSE,759,762
-91312,south,1827,7,3,1,FALSE,735,738
-98755,east,2811,4,1,,FALSE,663,666
-91312,south,3557,2,2.5,1,FALSE,888,892
-91312,,3343,7,1.5,2,TRUE,929,933
-95141,north,1612,6,2,,TRUE,644,647
-91312,south,2553,2,2.5,2,TRUE,884,889
-98755,east,1601,,1,3,TRUE,735,738
-96334,west,1682,3,1.5,1,FALSE,625,628
-98755,south,3926,6,2,2,TRUE,1040,1044
-91312,north,2583,1,2.5,,FALSE,706,710
-98755,south,2056,7,3,1,,766,770
-96334,east,2790,5,2.5,3,FALSE,923,927
-98755,east,2162,5,2.5,2,,792,795
-95141,west,3928,4,2,3,FALSE,1037,1042
-94555,south,2000,1,3,2,TRUE,844,848
-96334,east,2688,5,2,3,TRUE,938,943
-94555,south,3533,2,1.5,2,FALSE,890,894
-95141,north,1080,1,1,2,,566,569
-94555,south,2639,3,2,3,FALSE,876,880
-91312,west,2462,4,2.5,1,TRUE,828,832
-94555,north,2420,1,1.5,3,FALSE,809,813
-91312,north,2575,4,3,,FALSE,746,750
-95141,west,1269,2,2,2,,658,661
-96334,west,3911,6,1.5,3,TRUE,1072,1077
-96334,west,2166,6,1.5,3,TRUE,867,872
-94555,south,3855,6,2,1,FALSE,914,918
-91312,west,1971,2,3,1,TRUE,790,794
-95141,east,1769,4,1,1,TRUE,652,655
-98755,north,3774,6,1,2,TRUE,961,966
-96334,west,1044,2,3,1,TRUE,685,688
-95141,north,2561,7,1.5,1,TRUE,790,794
-94555,north,1714,4,1.5,,FALSE,563,565
-95141,south,2255,2,2,3,TRUE,883,887
-94555,north,3085,6,2,1,FALSE,819,823
-98755,south,1273,2,1.5,2,FALSE,628,631
-91312,west,3785,5,3,3,TRUE,1133,1138
-98755,east,2651,2,2,1,,750,753
-91312,west,3270,7,1.5,3,FALSE,943,947
-98755,south,1749,2,2,2,FALSE,712,715
-98755,south,1625,7,1.5,2,FALSE,691,694
-96334,north,3010,7,1.5,2,FALSE,848,851
-91312,south,3919,5,1,3,TRUE,1033,1038
-95141,north,1745,2,2,,FALSE,587,590
-91312,south,1976,1,2,1,TRUE,726,729
-91312,west,3953,3,1.5,3,FALSE,1006,1010
-95141,west,3439,4,2,2,FALSE,921,925
-94555,east,3570,7,1,2,TRUE,934,938
-98755,west,2484,5,3,2,TRUE,926,931
-91312,west,2628,3,1.5,3,,850,854
-94555,south,1349,3,2,2,TRUE,721,724
-94555,,3858,7,2.5,2,TRUE,1046,1051
-94555,south,1202,4,3,3,,767,771
-98755,west,3967,3,2,3,TRUE,1095,1100
-94555,west,2090,7,3,2,TRUE,885,889
-91312,north,3236,1,,,TRUE,699,702
-94555,west,3931,3,1,,,800,804
-96334,east,1856,5,2,3,TRUE,841,845
-91312,north,1922,2,3,1,TRUE,775,778
-98755,north,1199,3,3,3,FALSE,761,765
-96334,east,2108,3,1,3,TRUE,806,810
-95141,north,1259,3,1,,FALSE,478,481
-96334,west,3901,4,2,2,FALSE,976,981
-94555,south,2654,6,1.5,2,TRUE,859,863
-94555,west,3805,6,2,3,TRUE,1085,1090
-95141,south,3199,4,2,3,FALSE,947,951
-98755,west,3786,5,1,1,TRUE,909,913
-94555,east,2160,1,1,1,FALSE,629,631
-,west,2331,3,2,3,FALSE,842,846
-95141,east,3152,7,2,1,TRUE,883,887
-96334,south,1277,3,2,2,,659,663
-94555,east,1592,2,3,2,TRUE,791,795
-95141,east,3903,1,2.5,2,FALSE,976,981
+zipcode,district,sqft,numbedrooms,numbathrooms,floors,view,saleprice,askingprice
+95141,south,3002,6,3,2,FALSE,929,
+95141,west,1373,7,1,3,FALSE,695,698
+91312,south,3261,6,2,2,FALSE,902,906
+94555,north,1835,3,3,3,TRUE,888,892
+95141,west,2770,5,2.5,,TRUE,812,816
+95141,east,2833,6,2.5,2,TRUE,927,932
+96334,north,1339,6,3,1,,672,675
+96334,south,2742,6,2.5,2,FALSE,872,876
+96334,north,2195,5,2.5,2,FALSE,799,803
+98755,north,3469,7,2.5,2,FALSE,958,963
+95141,south,2777,3,,3,TRUE,837,841
+96334,west,1685,7,1.5,2,TRUE,757,760
+95141,west,2238,4,3,3,FALSE,894,899
+98755,south,2699,4,,3,FALSE,779,783
+91312,west,1245,4,1,1,FALSE,547,549
+95141,west,2233,2,2.5,3,,857,861
+98755,south,3702,7,3,1,FALSE,959,964
+95141,south,2644,4,1.5,3,,854,858
+94555,north,3576,2,,3,TRUE,921,925
+98755,west,1953,1,,1,TRUE,621,624
+98755,north,1865,7,1,2,TRUE,742,745
+94555,north,3837,3,1,1,FALSE,839,842
+91312,west,2139,3,1,3,TRUE,820,824
+95141,north,3824,4,3,1,FALSE,954,958
+98755,east,2858,5,1.5,1,FALSE,759,762
+91312,south,1827,7,3,1,FALSE,735,738
+98755,east,2811,4,1,,FALSE,663,666
+91312,south,3557,2,2.5,1,FALSE,888,892
+91312,,3343,7,1.5,2,TRUE,929,933
+95141,north,1612,6,2,,TRUE,644,647
+91312,south,2553,2,2.5,2,TRUE,884,889
+98755,east,1601,,1,3,TRUE,735,738
+96334,west,1682,3,1.5,1,FALSE,625,628
+98755,south,3926,6,2,2,TRUE,1040,1044
+91312,north,2583,1,2.5,,FALSE,706,710
+98755,south,2056,7,3,1,,766,770
+96334,east,2790,5,2.5,3,FALSE,923,927
+98755,east,2162,5,2.5,2,,792,795
+95141,west,3928,4,2,3,FALSE,1037,1042
+94555,south,2000,1,3,2,TRUE,844,848
+96334,east,2688,5,2,3,TRUE,938,943
+94555,south,3533,2,1.5,2,FALSE,890,894
+95141,north,1080,1,1,2,,566,569
+94555,south,2639,3,2,3,FALSE,876,880
+91312,west,2462,4,2.5,1,TRUE,828,832
+94555,north,2420,1,1.5,3,FALSE,809,813
+91312,north,2575,4,3,,FALSE,746,750
+95141,west,1269,2,2,2,,658,661
+96334,west,3911,6,1.5,3,TRUE,1072,1077
+96334,west,2166,6,1.5,3,TRUE,867,872
+94555,south,3855,6,2,1,FALSE,914,918
+91312,west,1971,2,3,1,TRUE,790,794
+95141,east,1769,4,1,1,TRUE,652,655
+98755,north,3774,6,1,2,TRUE,961,966
+96334,west,1044,2,3,1,TRUE,685,688
+95141,north,2561,7,1.5,1,TRUE,790,794
+94555,north,1714,4,1.5,,FALSE,563,565
+95141,south,2255,2,2,3,TRUE,883,887
+94555,north,3085,6,2,1,FALSE,819,823
+98755,south,1273,2,1.5,2,FALSE,628,631
+91312,west,3785,5,3,3,TRUE,1133,1138
+98755,east,2651,2,2,1,,750,753
+91312,west,3270,7,1.5,3,FALSE,943,947
+98755,south,1749,2,2,2,FALSE,712,715
+98755,south,1625,7,1.5,2,FALSE,691,694
+96334,north,3010,7,1.5,2,FALSE,848,851
+91312,south,3919,5,1,3,TRUE,1033,1038
+95141,north,1745,2,2,,FALSE,587,590
+91312,south,1976,1,2,1,TRUE,726,729
+91312,west,3953,3,1.5,3,FALSE,1006,1010
+95141,west,3439,4,2,2,FALSE,921,925
+94555,east,3570,7,1,2,TRUE,934,938
+98755,west,2484,5,3,2,TRUE,926,931
+91312,west,2628,3,1.5,3,,850,854
+94555,south,1349,3,2,2,TRUE,721,724
+94555,,3858,7,2.5,2,TRUE,1046,1051
+94555,south,1202,4,3,3,,767,771
+98755,west,3967,3,2,3,TRUE,1095,1100
+94555,west,2090,7,3,2,TRUE,885,889
+91312,north,3236,1,,,TRUE,699,702
+94555,west,3931,3,1,,,800,804
+96334,east,1856,5,2,3,TRUE,841,845
+91312,north,1922,2,3,1,TRUE,775,778
+98755,north,1199,3,3,3,FALSE,761,765
+96334,east,2108,3,1,3,TRUE,806,810
+95141,north,1259,3,1,,FALSE,478,481
+96334,west,3901,4,2,2,FALSE,976,981
+94555,south,2654,6,1.5,2,TRUE,859,863
+94555,west,3805,6,2,3,TRUE,1085,1090
+95141,south,3199,4,2,3,FALSE,947,951
+98755,west,3786,5,1,1,TRUE,909,913
+94555,east,2160,1,1,1,FALSE,629,631
+,west,2331,3,2,3,FALSE,842,846
+95141,east,3152,7,2,1,TRUE,883,887
+96334,south,1277,3,2,2,,659,663
+94555,east,1592,2,3,2,TRUE,791,795
+95141,east,3903,1,2.5,2,FALSE,976,981
 91312,south,1076,2,2.5,1,FALSE,597,600
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes2/homes.csv/homes2.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes2/homes.csv/homes2.csv b/src/test/scripts/functions/transform/input/homes2/homes.csv/homes2.csv
index 31474f5..4c4bd0e 100644
--- a/src/test/scripts/functions/transform/input/homes2/homes.csv/homes2.csv
+++ b/src/test/scripts/functions/transform/input/homes2/homes.csv/homes2.csv
@@ -1,102 +1,102 @@
-96334,west,1719,1,1.5,3,FALSE,738,742
-94555,north,1439,4,1.5,1,FALSE,589,592
-91312,east,1961,2,3,1,TRUE,775,778
-95141,south,3534,3,2,,TRUE,861,865
-94555,north,2471,1,1.5,1,TRUE,753,756
-91312,west,3930,4,2.5,2,FALSE,1004,1009
-95141,north,2529,4,1.5,,,660,663
-95141,south,2833,1,1,1,FALSE,718,721
-96334,west,1356,7,1.5,,FALSE,545,548
-96334,south,2580,4,1,2,TRUE,816,820
-94555,south,2169,3,2.5,3,TRUE,904,908
-95141,east,3329,4,3,3,TRUE,1064,1069
-95141,east,3660,1,2.5,2,,948,952
-96334,south,3392,4,2,3,TRUE,1026,1031
-96334,east,3688,6,2.5,3,FALSE,1032,1037
-98755,west,3347,3,2.5,2,TRUE,991,996
-95141,east,1810,5,1,1,FALSE,606,609
-95141,east,3753,1,2.5,2,FALSE,959,963
-94555,east,3906,2,1.5,1,FALSE,866,870
-96334,east,1732,3,2,1,TRUE,700,703
-96334,south,2188,4,2,1,TRUE,767,771
-96334,south,3750,6,2,2,FALSE,963,967
-96334,south,2292,6,,1,TRUE,677,680
-98755,west,1526,6,2.5,,TRUE,673,676
-98755,north,2331,1,1.5,1,TRUE,740,743
-94555,north,1512,4,3,3,TRUE,854,858
-98755,north,3352,3,3,3,FALSE,1014,1018
-96334,north,2378,2,,2,FALSE,669,672
-91312,,1159,7,2.5,1,TRUE,670,673
-94555,south,3426,3,2.5,2,FALSE,937,941
-98755,south,3211,5,3,1,TRUE,948,953
-98755,west,2747,2,2.5,1,FALSE,803,806
-96334,east,3952,6,1.5,1,TRUE,946,950
-91312,north,3814,6,1.5,2,FALSE,934,938
-95141,south,3700,7,2.5,1,FALSE,929,933
-98755,,2448,4,1,2,FALSE,733,736
-95141,west,2629,1,2,,FALSE,696,699
-95141,east,3154,4,2.5,1,TRUE,898,902
-91312,south,2648,4,1.5,2,FALSE,793,797
-98755,,3857,3,1,2,TRUE,949,953
-98755,north,1394,4,1.5,1,FALSE,587,590
-91312,west,2709,5,2,2,FALSE,837,841
-94555,east,3946,6,1,2,TRUE,974,978
-91312,north,3905,6,2,2,FALSE,973,977
-98755,east,3248,5,1.5,1,TRUE,860,864
-96334,north,1774,7,1.5,1,FALSE,644,647
-96334,,1576,4,1,2,TRUE,685,688
-95141,north,2853,,1.5,3,TRUE,912,916
-94555,east,1995,2,3,3,TRUE,897,902
-96334,south,3803,,1,3,TRUE,1001,1006
-94555,east,2876,2,3,1,FALSE,828,832
-98755,east,3553,4,,3,TRUE,925,930
-94555,east,3229,4,2,3,TRUE,995,1000
-94555,north,1079,5,2,2,FALSE,638,641
-95141,south,3695,7,2.5,3,FALSE,1046,1051
-96334,west,3694,5,1,1,TRUE,897,901
-98755,west,1918,5,1,2,FALSE,693,697
-94555,south,1647,6,1,2,TRUE,713,716
-96334,west,2691,3,2.5,2,FALSE,858,862
-95141,south,1333,2,2,2,TRUE,716,719
-95141,west,2609,4,2,1,FALSE,765,768
-98755,west,1725,2,2,3,,772,776
-91312,west,2125,3,1,2,TRUE,760,763
-91312,west,2417,5,1,1,FALSE,689,692
-98755,west,3623,2,1,3,TRUE,995,999
-98755,north,3343,6,3,1,FALSE,908,912
-96334,south,1074,7,2.5,3,FALSE,739,743
-96334,south,2972,3,1,2,TRUE,858,862
-91312,east,1637,2,2,1,FALSE,626,629
-91312,north,1807,2,3,2,FALSE,765,768
-95141,north,1457,2,3,1,FALSE,667,670
-91312,west,3043,6,1,1,FALSE,766,770
-91312,west,3045,6,1.5,3,TRUE,967,972
-91312,north,1444,2,,1,TRUE,552,555
-98755,north,1980,5,1,1,TRUE,688,691
-98755,west,1112,3,1.5,3,TRUE,732,735
-98755,south,1533,6,1.5,3,FALSE,734,738
-91312,east,1442,5,2,2,FALSE,675,678
-91312,north,3171,6,1,3,TRUE,945,949
-96334,east,3072,5,1.5,2,FALSE,842,846
-94555,east,3506,4,1.5,3,TRUE,1000,1005
-94555,south,1574,2,1,3,FALSE,691,694
-95141,south,3521,6,,,FALSE,706,709
-94555,east,3567,6,3,1,FALSE,926,931
-91312,south,1194,1,1,2,TRUE,637,640
-94555,east,1031,3,1.5,1,FALSE,532,535
-,south,3141,2,1.5,3,TRUE,955,960
-94555,south,2776,3,2.5,2,TRUE,916,920
-91312,south,2009,5,1.5,1,TRUE,719,723
-96334,north,3784,2,1,2,FALSE,889,893
-94555,west,1975,6,1.5,2,FALSE,729,732
-98755,west,2444,2,3,2,FALSE,854,857
-95141,south,1684,3,1.5,3,FALSE,737,740
-98755,north,1729,6,1,1,TRUE,663,666
-95141,west,2817,1,1,1,,721,724
-95141,north,2236,1,1,2,FALSE,702,705
-95141,south,2061,7,3,1,FALSE,764,768
-98755,south,3561,3,2.5,3,TRUE,1070,1075
-94555,east,2143,3,1,2,FALSE,694,697
-96334,north,3840,7,1,1,FALSE,858,862
-96334,,1086,2,2.5,2,,647,650
-98755,west,3686,,1,1,TRUE,876,880
+96334,west,1719,1,1.5,3,FALSE,738,742
+94555,north,1439,4,1.5,1,FALSE,589,592
+91312,east,1961,2,3,1,TRUE,775,778
+95141,south,3534,3,2,,TRUE,861,865
+94555,north,2471,1,1.5,1,TRUE,753,756
+91312,west,3930,4,2.5,2,FALSE,1004,1009
+95141,north,2529,4,1.5,,,660,663
+95141,south,2833,1,1,1,FALSE,718,721
+96334,west,1356,7,1.5,,FALSE,545,548
+96334,south,2580,4,1,2,TRUE,816,820
+94555,south,2169,3,2.5,3,TRUE,904,908
+95141,east,3329,4,3,3,TRUE,1064,1069
+95141,east,3660,1,2.5,2,,948,952
+96334,south,3392,4,2,3,TRUE,1026,1031
+96334,east,3688,6,2.5,3,FALSE,1032,1037
+98755,west,3347,3,2.5,2,TRUE,991,996
+95141,east,1810,5,1,1,FALSE,606,609
+95141,east,3753,1,2.5,2,FALSE,959,963
+94555,east,3906,2,1.5,1,FALSE,866,870
+96334,east,1732,3,2,1,TRUE,700,703
+96334,south,2188,4,2,1,TRUE,767,771
+96334,south,3750,6,2,2,FALSE,963,967
+96334,south,2292,6,,1,TRUE,677,680
+98755,west,1526,6,2.5,,TRUE,673,676
+98755,north,2331,1,1.5,1,TRUE,740,743
+94555,north,1512,4,3,3,TRUE,854,858
+98755,north,3352,3,3,3,FALSE,1014,1018
+96334,north,2378,2,,2,FALSE,669,672
+91312,,1159,7,2.5,1,TRUE,670,673
+94555,south,3426,3,2.5,2,FALSE,937,941
+98755,south,3211,5,3,1,TRUE,948,953
+98755,west,2747,2,2.5,1,FALSE,803,806
+96334,east,3952,6,1.5,1,TRUE,946,950
+91312,north,3814,6,1.5,2,FALSE,934,938
+95141,south,3700,7,2.5,1,FALSE,929,933
+98755,,2448,4,1,2,FALSE,733,736
+95141,west,2629,1,2,,FALSE,696,699
+95141,east,3154,4,2.5,1,TRUE,898,902
+91312,south,2648,4,1.5,2,FALSE,793,797
+98755,,3857,3,1,2,TRUE,949,953
+98755,north,1394,4,1.5,1,FALSE,587,590
+91312,west,2709,5,2,2,FALSE,837,841
+94555,east,3946,6,1,2,TRUE,974,978
+91312,north,3905,6,2,2,FALSE,973,977
+98755,east,3248,5,1.5,1,TRUE,860,864
+96334,north,1774,7,1.5,1,FALSE,644,647
+96334,,1576,4,1,2,TRUE,685,688
+95141,north,2853,,1.5,3,TRUE,912,916
+94555,east,1995,2,3,3,TRUE,897,902
+96334,south,3803,,1,3,TRUE,1001,1006
+94555,east,2876,2,3,1,FALSE,828,832
+98755,east,3553,4,,3,TRUE,925,930
+94555,east,3229,4,2,3,TRUE,995,1000
+94555,north,1079,5,2,2,FALSE,638,641
+95141,south,3695,7,2.5,3,FALSE,1046,1051
+96334,west,3694,5,1,1,TRUE,897,901
+98755,west,1918,5,1,2,FALSE,693,697
+94555,south,1647,6,1,2,TRUE,713,716
+96334,west,2691,3,2.5,2,FALSE,858,862
+95141,south,1333,2,2,2,TRUE,716,719
+95141,west,2609,4,2,1,FALSE,765,768
+98755,west,1725,2,2,3,,772,776
+91312,west,2125,3,1,2,TRUE,760,763
+91312,west,2417,5,1,1,FALSE,689,692
+98755,west,3623,2,1,3,TRUE,995,999
+98755,north,3343,6,3,1,FALSE,908,912
+96334,south,1074,7,2.5,3,FALSE,739,743
+96334,south,2972,3,1,2,TRUE,858,862
+91312,east,1637,2,2,1,FALSE,626,629
+91312,north,1807,2,3,2,FALSE,765,768
+95141,north,1457,2,3,1,FALSE,667,670
+91312,west,3043,6,1,1,FALSE,766,770
+91312,west,3045,6,1.5,3,TRUE,967,972
+91312,north,1444,2,,1,TRUE,552,555
+98755,north,1980,5,1,1,TRUE,688,691
+98755,west,1112,3,1.5,3,TRUE,732,735
+98755,south,1533,6,1.5,3,FALSE,734,738
+91312,east,1442,5,2,2,FALSE,675,678
+91312,north,3171,6,1,3,TRUE,945,949
+96334,east,3072,5,1.5,2,FALSE,842,846
+94555,east,3506,4,1.5,3,TRUE,1000,1005
+94555,south,1574,2,1,3,FALSE,691,694
+95141,south,3521,6,,,FALSE,706,709
+94555,east,3567,6,3,1,FALSE,926,931
+91312,south,1194,1,1,2,TRUE,637,640
+94555,east,1031,3,1.5,1,FALSE,532,535
+,south,3141,2,1.5,3,TRUE,955,960
+94555,south,2776,3,2.5,2,TRUE,916,920
+91312,south,2009,5,1.5,1,TRUE,719,723
+96334,north,3784,2,1,2,FALSE,889,893
+94555,west,1975,6,1.5,2,FALSE,729,732
+98755,west,2444,2,3,2,FALSE,854,857
+95141,south,1684,3,1.5,3,FALSE,737,740
+98755,north,1729,6,1,1,TRUE,663,666
+95141,west,2817,1,1,1,,721,724
+95141,north,2236,1,1,2,FALSE,702,705
+95141,south,2061,7,3,1,FALSE,764,768
+98755,south,3561,3,2.5,3,TRUE,1070,1075
+94555,east,2143,3,1,2,FALSE,694,697
+96334,north,3840,7,1,1,FALSE,858,862
+96334,,1086,2,2.5,2,,647,650
+98755,west,3686,,1,1,TRUE,876,880

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes2/homes.tfidspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes2/homes.tfidspec.json b/src/test/scripts/functions/transform/input/homes2/homes.tfidspec.json
index 45a86a3..62731a2 100644
--- a/src/test/scripts/functions/transform/input/homes2/homes.tfidspec.json
+++ b/src/test/scripts/functions/transform/input/homes2/homes.tfidspec.json
@@ -1,22 +1,22 @@
-{
-    "ids": true
-    ,"impute": [
-                 { "id": 1, "method": "global_mode" }
-                ,{ "id": 2, "method": "constant", "value": "south" }
-                ,{ "id": 4, "method": "constant", "value": "2" }
-                ,{ "id": 5, "method": "constant", "value": "1" }
-                ,{ "id": 6, "method": "constant", "value": "1" }
-                ,{ "id": 7, "method": "global_mode" }
-                ,{ "id": 9, "method": "global_mean" }
-              ]
-    
-    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
-    
-    ,"bin": [
-                 { "id": 8, "method": "equi-width", "numbins": 3 }
-                ,{ "id": 3, "method": "equi-width", "numbins": 4 }
-            ]
-
-    ,"dummycode": [ 2, 5, 6, 7, 8, 3 ]
-    
-}
+{
+    "ids": true
+    ,"impute": [
+                 { "id": 1, "method": "global_mode" }
+                ,{ "id": 2, "method": "constant", "value": "south" }
+                ,{ "id": 4, "method": "constant", "value": "2" }
+                ,{ "id": 5, "method": "constant", "value": "1" }
+                ,{ "id": 6, "method": "constant", "value": "1" }
+                ,{ "id": 7, "method": "global_mode" }
+                ,{ "id": 9, "method": "global_mean" }
+              ]
+    
+    ,"recode": [ 1, 2, 4, 5, 6, 7 ]
+    
+    ,"bin": [
+                 { "id": 8, "method": "equi-width", "numbins": 3 }
+                ,{ "id": 3, "method": "equi-width", "numbins": 4 }
+            ]
+
+    ,"dummycode": [ 2, 5, 6, 7, 8, 3 ]
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes2/homes.tfspec.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes2/homes.tfspec.json b/src/test/scripts/functions/transform/input/homes2/homes.tfspec.json
index 50dff46..3a86ec3 100644
--- a/src/test/scripts/functions/transform/input/homes2/homes.tfspec.json
+++ b/src/test/scripts/functions/transform/input/homes2/homes.tfspec.json
@@ -1,21 +1,21 @@
-{
-    "impute": [
-                 { "name": "zipcode"     , "method": "global_mode" }
-                ,{ "name": "district"    , "method": "constant", "value": "south" }
-                ,{ "name": "numbedrooms" , "method": "constant", "value": "2" }
-                ,{ "name": "numbathrooms", "method": "constant", "value": "1" }
-                ,{ "name": "floors"	     , "method": "constant", "value": "1" }
-                ,{ "name": "view"	     , "method": "global_mode" }
-                ,{ "name": "askingprice"     , "method": "global_mean" }
-              ]
-    
-    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
-    
-    ,"bin": [
-                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
-                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
-            ]
-
-    ,"dummycode": [ "district", "numbathrooms", "floors", "view", "saleprice", "sqft" ]
-    
-}
+{
+    "impute": [
+                 { "name": "zipcode"     , "method": "global_mode" }
+                ,{ "name": "district"    , "method": "constant", "value": "south" }
+                ,{ "name": "numbedrooms" , "method": "constant", "value": "2" }
+                ,{ "name": "numbathrooms", "method": "constant", "value": "1" }
+                ,{ "name": "floors"	     , "method": "constant", "value": "1" }
+                ,{ "name": "view"	     , "method": "global_mode" }
+                ,{ "name": "askingprice"     , "method": "global_mean" }
+              ]
+    
+    ,"recode": [ "zipcode", "district", "numbedrooms", "numbathrooms", "floors", "view" ]
+    
+    ,"bin": [
+                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
+                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
+            ]
+
+    ,"dummycode": [ "district", "numbathrooms", "floors", "view", "saleprice", "sqft" ]
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/transform/input/homes2/homes.tfspec2.json
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/input/homes2/homes.tfspec2.json b/src/test/scripts/functions/transform/input/homes2/homes.tfspec2.json
index 430e7bc..db879cf 100644
--- a/src/test/scripts/functions/transform/input/homes2/homes.tfspec2.json
+++ b/src/test/scripts/functions/transform/input/homes2/homes.tfspec2.json
@@ -1,21 +1,21 @@
-{
-    "impute": [
-                 { "name": "zipcode"     , "method": "global_mode" }
-                ,{ "name": "district"    , "method": "global_mode" }
-                ,{ "name": "numbedrooms" , "method": "constant", "value": "2" }
-                ,{ "name": "numbathrooms", "method": "constant", "value": "1" }
-                ,{ "name": "floors"	     , "method": "global_mode" }
-                ,{ "name": "view"	     , "method": "global_mode" }
-                ,{ "name": "askingprice"     , "method": "global_mean" }
-              ]
-    
-    ,"recode": [ "zipcode", "district", "view", "numbedrooms", "numbathrooms"]
-    
-    ,"bin": [
-                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
-                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
-            ]
-
-    ,"dummycode": [ "numbathrooms", "floors", "saleprice", "sqft" ]
-    
-}
+{
+    "impute": [
+                 { "name": "zipcode"     , "method": "global_mode" }
+                ,{ "name": "district"    , "method": "global_mode" }
+                ,{ "name": "numbedrooms" , "method": "constant", "value": "2" }
+                ,{ "name": "numbathrooms", "method": "constant", "value": "1" }
+                ,{ "name": "floors"	     , "method": "global_mode" }
+                ,{ "name": "view"	     , "method": "global_mode" }
+                ,{ "name": "askingprice"     , "method": "global_mean" }
+              ]
+    
+    ,"recode": [ "zipcode", "district", "view", "numbedrooms", "numbathrooms"]
+    
+    ,"bin": [
+                 { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
+                ,{ "name": "sqft"       , "method": "equi-width", "numbins": 4 }
+            ]
+
+    ,"dummycode": [ "numbathrooms", "floors", "saleprice", "sqft" ]
+    
+}


[18/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate2.dml b/src/test/scripts/applications/parfor/parfor_bivariate2.dml
index f6dd4d5..d0734a9 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate2.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate2.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate3.dml b/src/test/scripts/applications/parfor/parfor_bivariate3.dml
index 1443c6e..990a6fe 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate3.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate3.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate4.dml b/src/test/scripts/applications/parfor/parfor_bivariate4.dml
index 9b85bad..a19957f 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate4.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate4.dml
@@ -1,258 +1,258 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-#numpairs = s1size * s2size;
-#print(s1size + ", " + s2size + ", " + numpairs);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, check=0) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, check=0) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal     
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-    
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-    
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, check=0) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal     
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+    
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+    
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr.R b/src/test/scripts/applications/parfor/parfor_corr.R
index 5f8c315..854e593 100644
--- a/src/test/scripts/applications/parfor/parfor_corr.R
+++ b/src/test/scripts/applications/parfor/parfor_corr.R
@@ -1,48 +1,48 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-
-m <- nrow(V);
-n <- ncol(V); 
-W <- m;
-
-R <- array(0,dim=c(n,n))
-
-for( i in 1:(n-1) )
-{
-   X <- V[ ,i];                 
-      
-   for( j in (i+1):n )  
-   {
-      Y <- V[ ,j];  
-      R[i,j] <- cor(X, Y)  
-      #print(R[i,j]);
-   }
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+
+m <- nrow(V);
+n <- ncol(V); 
+W <- m;
+
+R <- array(0,dim=c(n,n))
+
+for( i in 1:(n-1) )
+{
+   X <- V[ ,i];                 
+      
+   for( j in (i+1):n )  
+   {
+      Y <- V[ ,j];  
+      R[i,j] <- cor(X, Y)  
+      #print(R[i,j]);
+   }
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr0.dml b/src/test/scripts/applications/parfor/parfor_corr0.dml
index 7a340bd..a711d0a 100644
--- a/src/test/scripts/applications/parfor/parfor_corr0.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr0.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0,rows=n,cols=n); 
-
-for( i in 1:(n-1) )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   for( j in (i+1):n )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      #print("R[("+i+","+j+")]="+rXY);
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0,rows=n,cols=n); 
+
+for( i in 1:(n-1) )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   for( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY);
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr1.dml b/src/test/scripts/applications/parfor/parfor_corr1.dml
index 8bb0368..b2d5a14 100644
--- a/src/test/scripts/applications/parfor/parfor_corr1.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr1.dml
@@ -1,50 +1,50 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )  
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      #print("R[("+i+","+j+")]="+rXY); 
-      R[i,j] = rXY;       
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )  
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY); 
+      R[i,j] = rXY;       
+   }
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr2.dml b/src/test/scripts/applications/parfor/parfor_corr2.dml
index c5f5c31..9e10534 100644
--- a/src/test/scripts/applications/parfor/parfor_corr2.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr2.dml
@@ -1,50 +1,50 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
-      R[i,j] = rXY;       
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
+      R[i,j] = rXY;       
+   }
+}   
+
 write(R, $4);     
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr3.dml b/src/test/scripts/applications/parfor/parfor_corr3.dml
index 46295a3..95fc38f 100644
--- a/src/test/scripts/applications/parfor/parfor_corr3.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr3.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr4.dml b/src/test/scripts/applications/parfor/parfor_corr4.dml
index 62e643d..16b4767 100644
--- a/src/test/scripts/applications/parfor/parfor_corr4.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr4.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1) )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[{"+i+","+j+"}]="+rXY); 
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1) )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[{"+i+","+j+"}]="+rXY); 
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr5.dml b/src/test/scripts/applications/parfor/parfor_corr5.dml
index 0da9539..1663b8f 100644
--- a/src/test/scripts/applications/parfor/parfor_corr5.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr5.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1), profile=1 )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n, profile=1 )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[{"+i+","+j+"}]="+rXY); 
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1), profile=1 )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n, profile=1 )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[{"+i+","+j+"}]="+rXY); 
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr6.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr6.dml b/src/test/scripts/applications/parfor/parfor_corr6.dml
index 166b3d7..2ee26cc 100644
--- a/src/test/scripts/applications/parfor/parfor_corr6.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr6.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:(n-1), log=debug )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[{"+i+","+j+"}]="+rXY); 
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:(n-1), log=debug )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[{"+i+","+j+"}]="+rXY); 
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file


[20/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/m-svm/m-svm.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.dml b/src/test/scripts/applications/m-svm/m-svm.dml
index 1307707..bbf5acc 100644
--- a/src/test/scripts/applications/m-svm/m-svm.dml
+++ b/src/test/scripts/applications/m-svm/m-svm.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = read($X)
-
-check_X = sum(X)
-if(check_X == 0){
-	print("X has no non-zeros")
-}else{
-	Y = read($Y)
-	intercept = cmdLine_icpt
-	num_classes = $classes
-	epsilon = cmdLine_tol
-	lambda = cmdLine_reg
-	max_iterations = cmdLine_maxiter
- 
-	num_samples = nrow(X)
-	num_features = ncol(X)
-
-	if (intercept == 1) {
- 		ones  = matrix(1, rows=num_samples, cols=1);
- 		X = append(X, ones);
-	}
-
-	num_rows_in_w = num_features
-	if(intercept == 1){
-		num_rows_in_w = num_rows_in_w + 1
-	}
-	w = matrix(0, rows=num_rows_in_w, cols=num_classes)
-
-	debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
-	parfor(iter_class in 1:num_classes){		  
-		Y_local = 2 * ppred(Y, iter_class, "==") - 1
-		w_class = matrix(0, rows=num_features, cols=1)
-		if (intercept == 1) {
-			zero_matrix = matrix(0, rows=1, cols=1);
- 			w_class = t(append(t(w_class), zero_matrix));
- 		}
- 
-		g_old = t(X) %*% Y_local
- 		s = g_old
-
-		Xw = matrix(0, rows=nrow(X), cols=1)
-		iter = 0
- 		continue = 1
- 		while(continue == 1)  {
-  			# minimizing primal obj along direction s
-  			step_sz = 0
-  			Xd = X %*% s
-  			wd = lambda * sum(w_class * s)
-  			dd = lambda * sum(s * s)
-  			continue1 = 1
-  			while(continue1 == 1){
-   				tmp_Xw = Xw + step_sz*Xd
-   				out = 1 - Y_local * (tmp_Xw)
-   				sv = ppred(out, 0, ">")
-   				out = out * sv
-   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
-   				h = dd + sum(Xd * sv * Xd)
-   				step_sz = step_sz - g/h
-   				if (g*g/h < 0.0000000001){
-    				continue1 = 0
-   				}
-  			}
- 
-  			#update weights
-  			w_class = w_class + step_sz*s
- 			Xw = Xw + step_sz*Xd
- 
-  			out = 1 - Y_local * Xw
-  			sv = ppred(out, 0, ">")
-  			out = sv * out
-  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-  			tmp = sum(s * g_old)
-  
-  			train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-  			print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-  			debug_mat[iter+1,iter_class] = obj	   
-   
-  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-   				continue = 0
-  			}
- 
-  			#non-linear CG step
-  			be = sum(g_new * g_new)/sum(g_old * g_old)
-  			s = be * s + g_new
-  			g_old = g_new
-
-  			iter = iter + 1
- 		}
-
-		w[,iter_class] = w_class
-	}
-
-	write(w, $model, format=cmdLine_fmt)
-
-	debug_str = "# Class, Iter, Obj"
-	for(iter_class in 1:ncol(debug_mat)){
-		for(iter in 1:nrow(debug_mat)){
-			obj = castAsScalar(debug_mat[iter, iter_class])
-			if(obj != -1) 
-				debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-		}
-	}
-	write(debug_str, $Log)
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = read($X)
+
+check_X = sum(X)
+if(check_X == 0){
+	print("X has no non-zeros")
+}else{
+	Y = read($Y)
+	intercept = cmdLine_icpt
+	num_classes = $classes
+	epsilon = cmdLine_tol
+	lambda = cmdLine_reg
+	max_iterations = cmdLine_maxiter
+ 
+	num_samples = nrow(X)
+	num_features = ncol(X)
+
+	if (intercept == 1) {
+ 		ones  = matrix(1, rows=num_samples, cols=1);
+ 		X = append(X, ones);
+	}
+
+	num_rows_in_w = num_features
+	if(intercept == 1){
+		num_rows_in_w = num_rows_in_w + 1
+	}
+	w = matrix(0, rows=num_rows_in_w, cols=num_classes)
+
+	debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+	parfor(iter_class in 1:num_classes){		  
+		Y_local = 2 * ppred(Y, iter_class, "==") - 1
+		w_class = matrix(0, rows=num_features, cols=1)
+		if (intercept == 1) {
+			zero_matrix = matrix(0, rows=1, cols=1);
+ 			w_class = t(append(t(w_class), zero_matrix));
+ 		}
+ 
+		g_old = t(X) %*% Y_local
+ 		s = g_old
+
+		Xw = matrix(0, rows=nrow(X), cols=1)
+		iter = 0
+ 		continue = 1
+ 		while(continue == 1)  {
+  			# minimizing primal obj along direction s
+  			step_sz = 0
+  			Xd = X %*% s
+  			wd = lambda * sum(w_class * s)
+  			dd = lambda * sum(s * s)
+  			continue1 = 1
+  			while(continue1 == 1){
+   				tmp_Xw = Xw + step_sz*Xd
+   				out = 1 - Y_local * (tmp_Xw)
+   				sv = ppred(out, 0, ">")
+   				out = out * sv
+   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
+   				h = dd + sum(Xd * sv * Xd)
+   				step_sz = step_sz - g/h
+   				if (g*g/h < 0.0000000001){
+    				continue1 = 0
+   				}
+  			}
+ 
+  			#update weights
+  			w_class = w_class + step_sz*s
+ 			Xw = Xw + step_sz*Xd
+ 
+  			out = 1 - Y_local * Xw
+  			sv = ppred(out, 0, ">")
+  			out = sv * out
+  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+  			tmp = sum(s * g_old)
+  
+  			train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+  			print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+  			debug_mat[iter+1,iter_class] = obj	   
+   
+  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+   				continue = 0
+  			}
+ 
+  			#non-linear CG step
+  			be = sum(g_new * g_new)/sum(g_old * g_old)
+  			s = be * s + g_new
+  			g_old = g_new
+
+  			iter = iter + 1
+ 		}
+
+		w[,iter_class] = w_class
+	}
+
+	write(w, $model, format=cmdLine_fmt)
+
+	debug_str = "# Class, Iter, Obj"
+	for(iter_class in 1:ncol(debug_mat)){
+		for(iter in 1:nrow(debug_mat)){
+			obj = castAsScalar(debug_mat[iter, iter_class])
+			if(obj != -1) 
+				debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+		}
+	}
+	write(debug_str, $Log)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/m-svm/m-svm.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.pydml b/src/test/scripts/applications/m-svm/m-svm.pydml
index 83f17cf..348f599 100644
--- a/src/test/scripts/applications/m-svm/m-svm.pydml
+++ b/src/test/scripts/applications/m-svm/m-svm.pydml
@@ -1,136 +1,136 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the pydml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = load($X)
-
-check_X = sum(X)
-if(check_X == 0):
-    print("X has no non-zeros")
-else:
-    Y = load($Y)
-    intercept = cmdLine_icpt
-    num_classes = $classes
-    epsilon = cmdLine_tol
-    lambda = cmdLine_reg
-    max_iterations = cmdLine_maxiter
-    
-    num_samples = nrow(X)
-    num_features = ncol(X)
-    
-    if (intercept == 1):
-        ones  = full(1, rows=num_samples, cols=1)
-        X = append(X, ones)
-    
-    num_rows_in_w = num_features
-    if(intercept == 1):
-        num_rows_in_w = num_rows_in_w + 1
-    w = full(0, rows=num_rows_in_w, cols=num_classes)
-    
-    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
-    parfor(iter_class in 1:num_classes):
-        Y_local = 2 * ppred(Y, iter_class, "==") - 1
-        w_class = full(0, rows=num_features, cols=1)
-        if (intercept == 1):
-            zero_matrix = full(0, rows=1, cols=1)
-            w_class = transpose(append(transpose(w_class), zero_matrix))
-        g_old = dot(transpose(X), Y_local)
-        s = g_old
-        
-        Xw = full(0, rows=nrow(X), cols=1)
-        iter = 0
-        continue = 1
-        while(continue == 1):
-            # minimizing primal obj along direction s
-            step_sz = 0
-            Xd = dot(X, s)
-            wd = lambda * sum(w_class * s)
-            dd = lambda * sum(s * s)
-            continue1 = 1
-            while(continue1 == 1):
-                tmp_Xw = Xw + step_sz*Xd
-                out = 1 - Y_local * (tmp_Xw)
-                sv = ppred(out, 0, ">")
-                out = out * sv
-                g = wd + step_sz*dd - sum(out * Y_local * Xd)
-                h = dd + sum(Xd * sv * Xd)
-                step_sz = step_sz - g/h
-                if (g*g/h < 0.0000000001):
-                    continue1 = 0
-            
-            #update weights
-            w_class = w_class + step_sz*s
-            Xw = Xw + step_sz*Xd
-            
-            out = 1 - Y_local * Xw
-            sv = ppred(out, 0, ">")
-            out = sv * out
-            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
-            
-            tmp = sum(s * g_old)
-            
-            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, ">="))/num_samples*100
-            print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-            debug_mat[iter+1,iter_class] = obj
-            
-            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
-                continue = 0
-            
-            #non-linear CG step
-            be = sum(g_new * g_new)/sum(g_old * g_old)
-            s = be * s + g_new
-            g_old = g_new
-            
-            iter = iter + 1
-        # end while(continue == 1)
-        
-        w[,iter_class] = w_class
-    # end parfor(iter_class in 1:num_classes)
-    
-    save(w, $model, format=cmdLine_fmt)
-    
-    debug_str = "# Class, Iter, Obj"
-    for(iter_class in 1:ncol(debug_mat)):
-        for(iter in 1:nrow(debug_mat)):
-            obj = castAsScalar(debug_mat[iter, iter_class])
-            if(obj != -1):
-                debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-    save(debug_str, $Log)
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the pydml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = load($X)
+
+check_X = sum(X)
+if(check_X == 0):
+    print("X has no non-zeros")
+else:
+    Y = load($Y)
+    intercept = cmdLine_icpt
+    num_classes = $classes
+    epsilon = cmdLine_tol
+    lambda = cmdLine_reg
+    max_iterations = cmdLine_maxiter
+    
+    num_samples = nrow(X)
+    num_features = ncol(X)
+    
+    if (intercept == 1):
+        ones  = full(1, rows=num_samples, cols=1)
+        X = append(X, ones)
+    
+    num_rows_in_w = num_features
+    if(intercept == 1):
+        num_rows_in_w = num_rows_in_w + 1
+    w = full(0, rows=num_rows_in_w, cols=num_classes)
+    
+    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
+    parfor(iter_class in 1:num_classes):
+        Y_local = 2 * ppred(Y, iter_class, "==") - 1
+        w_class = full(0, rows=num_features, cols=1)
+        if (intercept == 1):
+            zero_matrix = full(0, rows=1, cols=1)
+            w_class = transpose(append(transpose(w_class), zero_matrix))
+        g_old = dot(transpose(X), Y_local)
+        s = g_old
+        
+        Xw = full(0, rows=nrow(X), cols=1)
+        iter = 0
+        continue = 1
+        while(continue == 1):
+            # minimizing primal obj along direction s
+            step_sz = 0
+            Xd = dot(X, s)
+            wd = lambda * sum(w_class * s)
+            dd = lambda * sum(s * s)
+            continue1 = 1
+            while(continue1 == 1):
+                tmp_Xw = Xw + step_sz*Xd
+                out = 1 - Y_local * (tmp_Xw)
+                sv = ppred(out, 0, ">")
+                out = out * sv
+                g = wd + step_sz*dd - sum(out * Y_local * Xd)
+                h = dd + sum(Xd * sv * Xd)
+                step_sz = step_sz - g/h
+                if (g*g/h < 0.0000000001):
+                    continue1 = 0
+            
+            #update weights
+            w_class = w_class + step_sz*s
+            Xw = Xw + step_sz*Xd
+            
+            out = 1 - Y_local * Xw
+            sv = ppred(out, 0, ">")
+            out = sv * out
+            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
+            
+            tmp = sum(s * g_old)
+            
+            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, ">="))/num_samples*100
+            print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+            debug_mat[iter+1,iter_class] = obj
+            
+            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
+                continue = 0
+            
+            #non-linear CG step
+            be = sum(g_new * g_new)/sum(g_old * g_old)
+            s = be * s + g_new
+            g_old = g_new
+            
+            iter = iter + 1
+        # end while(continue == 1)
+        
+        w[,iter_class] = w_class
+    # end parfor(iter_class in 1:num_classes)
+    
+    save(w, $model, format=cmdLine_fmt)
+    
+    debug_str = "# Class, Iter, Obj"
+    for(iter_class in 1:ncol(debug_mat)):
+        for(iter in 1:nrow(debug_mat)):
+            obj = castAsScalar(debug_mat[iter, iter_class])
+            if(obj != -1):
+                debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+    save(debug_str, $Log)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/mdabivar/MDABivariateStats.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.R b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
index 7715c5e..1844bbb 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.R
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
@@ -1,294 +1,294 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-bivar_ss = function(X, Y) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    m2X = var(X)
-    m2Y = var(Y)
-    sigmaX = sqrt(m2X)
-    sigmaY = sqrt(m2Y)
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = sigmaY))
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(A, B) {
-
-    # Contingency Table
-    F = table(A,B)
-    
-    # Chi-Squared
-    cst = chisq.test(F)
-
-    r = rowSums(F)
-    c = colSums(F)
-    
-    chi_squared = as.numeric(cst[1])
-
-    # compute p-value
-    pValue = as.numeric(cst[3])
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-    return(list("pval" = pval, "contingencyTable" = contingencyTable, "rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Y, A) {
-    # mean and variance in target variable
-    W = length(A)
-    my = mean(Y)
-    varY = var(Y)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = as.matrix(table(A)) 
-
-    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
-
-    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
-    CVars[is.na(CVars)] <- 0
-
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = 1-pf(AnovaF, df1, df2)
-
-    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = CVars))
-}
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library(Matrix)
-
-# input data set
-D = readMM(paste(args[1], "X.mtx", sep=""));
-
-# label attr id (must be a valid index > 0)  
-label_index = as.integer(args[2])
-
-# feature attributes, column vector of indices
-feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = as.integer(args[3]) 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = readMM(paste(args[1], "feature_measurement_levels.mtx", sep="")) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, sz, 1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, sz, 1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, sz, 1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, sz, 1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-	numLabels = length(labels)
-        cmLabels = var(labels)
-    	stdLabels = sqrt(cmLabels)
-	standard_deviations[label_index,1] = stdLabels
-}else{
-	labelCorrection = 1 - min(labels)
-	labels = labels + labelCorrection
-}
-
-mx = apply(D, 2, max)
-mn = apply(D, 2, min)	
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-	feature_index1 = feature_indices[i1,1]
-	num = num_distinct_values[feature_index1]
-	if(feature_measurement_levels[i1,1] == 0 & num >= max_num_distinct_values){
-		max_num_distinct_values = num
-	}
-}
-distinct_label_values = matrix(0, 1, 1)	
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-	maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-	distinct_label_values = as.data.frame(table(labels))$Freq
-	if(max_num_distinct_values != 0){
-		contingencyTableSz = max_num_distinct_values*length(distinct_label_values)
-	}
-	maxNumberOfGroups = max(maxNumberOfGroups, length(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, sz, maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, sz, maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, sz, maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, sz, maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-	featureCounts[label_index,1:length(distinct_label_values)] = distinct_label_values
-	for(i2 in 1:length(distinct_label_values)){
-		featureValues[label_index,i2] = i2-labelCorrection
-	}
-}
-
-for(i3 in 1:nrow(feature_indices)){
-	feature_index2 = feature_indices[i3,1]
-	feature_measurement_level = feature_measurement_levels[i3,1]
-	
-	feature = D[,feature_index2]
-	
-	if(feature_measurement_level == 0){
-		featureCorrection = 1 - min(feature)
-		feature = feature + featureCorrection
-			
-		if(label_measurement_level == feature_measurement_level){
-		  # categorical-categorical
-		  tests[feature_index2,1] = 1
-
-		  ret = bivar_cc(labels, feature)
-                  pVal = ret$pval
-                  contingencyTable = ret$contingencyTable
-                  rowMarginals = ret$rowMarginals
-                  colMarginals = ret$colMarginals
-
-		  stats[feature_index2,1] = pVal
-			
-		  sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-			
-		  contingencyTableCounts = matrix(0, 1, sz3)
-		  contingencyTableLabelValues = matrix(0, 1, sz3)
-		  contingencyTableFeatureValues = matrix(0, 1, sz3)
-			
-            	  for(i4 in 1:nrow(contingencyTable)){
-		  	 for(j in 1:ncol(contingencyTable)){
-					#get rid of this, see *1 below
-					contingencyTableCounts[1, ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
-					
-					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-				}
-			}
-			contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-			
-			featureCounts[feature_index2,1:length(colMarginals)] = colMarginals
-			for(i5 in 1:length(colMarginals)){
-				featureValues[feature_index2,i5] = i5-featureCorrection
-			}
-		}else{
-			# label is scale, feature is categorical
-			tests[feature_index2,1] = 2
-			
-			ret = bivar_sc(labels, feature)
-                  pVal = ret$pVal
-                  frequencies = ret$CFreqs
-                  means = ret$CMeans
-                  variances = ret$CVars
-
-			stats[feature_index2,1] = pVal
-			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
-			for(i6 in 1:nrow(frequencies)){
-				featureValues[feature_index2,i6] = i6 - featureCorrection
-			}
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}else{
-		if(label_measurement_level == feature_measurement_level){
-		  # scale-scale
-		  tests[feature_index2,1] = 3
-
-		  ret = bivar_ss(labels, feature)
-                  r = ret$R
-                  covariance = ret$covXY
-                  stdX = ret$sigmaX
-                  stdY = ret$sigmaY
- 
-		  stats[feature_index2,1] = r
-		  covariances[feature_index2,1] = covariance
-		  standard_deviations[feature_index2,1] = stdY
-		}else{
-		  # label is categorical, feature is scale
-		  tests[feature_index2,1] = 2
-			
-		  ret = bivar_sc(feature, labels)
-		  pVal = ret$pVal
-		  frequencies = ret$CFreqs
-                  means = ret$CMeans
-		  variances = ret$CVars
-			
-		  stats[feature_index2,1] = pVal
-		  featureMeans[feature_index2,1:nrow(means)] = t(means)
-		  featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}
-}
-
-writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
-writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
-writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", sep=""))
-writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], "standard_deviations", sep=""))
-writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], "contingencyTablesCounts", sep=""))
-writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], "contingencyTablesLabelValues", sep=""))
-writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], "contingencyTablesFeatureValues", sep=""))
-writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", sep=""))
-writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", sep=""))
-writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", sep=""))
-writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", sep=""))
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+bivar_ss = function(X, Y) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    m2X = var(X)
+    m2Y = var(Y)
+    sigmaX = sqrt(m2X)
+    sigmaY = sqrt(m2Y)
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = sigmaY))
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(A, B) {
+
+    # Contingency Table
+    F = table(A,B)
+    
+    # Chi-Squared
+    cst = chisq.test(F)
+
+    r = rowSums(F)
+    c = colSums(F)
+    
+    chi_squared = as.numeric(cst[1])
+
+    # compute p-value
+    pValue = as.numeric(cst[3])
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+    return(list("pval" = pval, "contingencyTable" = contingencyTable, "rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Y, A) {
+    # mean and variance in target variable
+    W = length(A)
+    my = mean(Y)
+    varY = var(Y)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = as.matrix(table(A)) 
+
+    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
+
+    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
+    CVars[is.na(CVars)] <- 0
+
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = 1-pf(AnovaF, df1, df2)
+
+    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = CVars))
+}
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library(Matrix)
+
+# input data set
+D = readMM(paste(args[1], "X.mtx", sep=""));
+
+# label attr id (must be a valid index > 0)  
+label_index = as.integer(args[2])
+
+# feature attributes, column vector of indices
+feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = as.integer(args[3]) 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = readMM(paste(args[1], "feature_measurement_levels.mtx", sep="")) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, sz, 1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, sz, 1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, sz, 1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, sz, 1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+	numLabels = length(labels)
+        cmLabels = var(labels)
+    	stdLabels = sqrt(cmLabels)
+	standard_deviations[label_index,1] = stdLabels
+}else{
+	labelCorrection = 1 - min(labels)
+	labels = labels + labelCorrection
+}
+
+mx = apply(D, 2, max)
+mn = apply(D, 2, min)	
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+	feature_index1 = feature_indices[i1,1]
+	num = num_distinct_values[feature_index1]
+	if(feature_measurement_levels[i1,1] == 0 & num >= max_num_distinct_values){
+		max_num_distinct_values = num
+	}
+}
+distinct_label_values = matrix(0, 1, 1)	
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+	maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+	distinct_label_values = as.data.frame(table(labels))$Freq
+	if(max_num_distinct_values != 0){
+		contingencyTableSz = max_num_distinct_values*length(distinct_label_values)
+	}
+	maxNumberOfGroups = max(maxNumberOfGroups, length(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, sz, maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, sz, maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, sz, maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, sz, maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+	featureCounts[label_index,1:length(distinct_label_values)] = distinct_label_values
+	for(i2 in 1:length(distinct_label_values)){
+		featureValues[label_index,i2] = i2-labelCorrection
+	}
+}
+
+for(i3 in 1:nrow(feature_indices)){
+	feature_index2 = feature_indices[i3,1]
+	feature_measurement_level = feature_measurement_levels[i3,1]
+	
+	feature = D[,feature_index2]
+	
+	if(feature_measurement_level == 0){
+		featureCorrection = 1 - min(feature)
+		feature = feature + featureCorrection
+			
+		if(label_measurement_level == feature_measurement_level){
+		  # categorical-categorical
+		  tests[feature_index2,1] = 1
+
+		  ret = bivar_cc(labels, feature)
+                  pVal = ret$pval
+                  contingencyTable = ret$contingencyTable
+                  rowMarginals = ret$rowMarginals
+                  colMarginals = ret$colMarginals
+
+		  stats[feature_index2,1] = pVal
+			
+		  sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+			
+		  contingencyTableCounts = matrix(0, 1, sz3)
+		  contingencyTableLabelValues = matrix(0, 1, sz3)
+		  contingencyTableFeatureValues = matrix(0, 1, sz3)
+			
+            	  for(i4 in 1:nrow(contingencyTable)){
+		  	 for(j in 1:ncol(contingencyTable)){
+					#get rid of this, see *1 below
+					contingencyTableCounts[1, ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
+					
+					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+				}
+			}
+			contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+			
+			featureCounts[feature_index2,1:length(colMarginals)] = colMarginals
+			for(i5 in 1:length(colMarginals)){
+				featureValues[feature_index2,i5] = i5-featureCorrection
+			}
+		}else{
+			# label is scale, feature is categorical
+			tests[feature_index2,1] = 2
+			
+			ret = bivar_sc(labels, feature)
+                  pVal = ret$pVal
+                  frequencies = ret$CFreqs
+                  means = ret$CMeans
+                  variances = ret$CVars
+
+			stats[feature_index2,1] = pVal
+			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
+			for(i6 in 1:nrow(frequencies)){
+				featureValues[feature_index2,i6] = i6 - featureCorrection
+			}
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}else{
+		if(label_measurement_level == feature_measurement_level){
+		  # scale-scale
+		  tests[feature_index2,1] = 3
+
+		  ret = bivar_ss(labels, feature)
+                  r = ret$R
+                  covariance = ret$covXY
+                  stdX = ret$sigmaX
+                  stdY = ret$sigmaY
+ 
+		  stats[feature_index2,1] = r
+		  covariances[feature_index2,1] = covariance
+		  standard_deviations[feature_index2,1] = stdY
+		}else{
+		  # label is categorical, feature is scale
+		  tests[feature_index2,1] = 2
+			
+		  ret = bivar_sc(feature, labels)
+		  pVal = ret$pVal
+		  frequencies = ret$CFreqs
+                  means = ret$CMeans
+		  variances = ret$CVars
+			
+		  stats[feature_index2,1] = pVal
+		  featureMeans[feature_index2,1:nrow(means)] = t(means)
+		  featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}
+}
+
+writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
+writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
+writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", sep=""))
+writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], "standard_deviations", sep=""))
+writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], "contingencyTablesCounts", sep=""))
+writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], "contingencyTablesLabelValues", sep=""))
+writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], "contingencyTablesFeatureValues", sep=""))
+writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", sep=""))
+writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", sep=""))
+writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", sep=""))
+writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", sep=""))
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
index 1e04154..56163ad 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
@@ -1,268 +1,268 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = read($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = read($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-	numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-	standard_deviations[label_index,1] = stdLabels
-}else{
-	labelCorrection = 1 - min(labels)
-	labels = labels + labelCorrection
-}
-
-mx = colMaxs(D)
-mn = colMins(D)	
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-	feature_index1 = castAsScalar(feature_indices[i1,1])
-	num = castAsScalar(num_distinct_values[1,feature_index1])
-	if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values){
-		max_num_distinct_values = num
-	}
-}
-distinct_label_values = matrix(0, rows=1, cols=1)	
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-	maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-	distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
-	if(max_num_distinct_values != 0){
-		contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
-	}
-	maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-	featureCounts[label_index,1:nrow(distinct_label_values)] = t(distinct_label_values)
-	parfor(i2 in 1:nrow(distinct_label_values)){
-		featureValues[label_index,i2] = i2-labelCorrection
-	}
-}
-
-parfor(i3 in 1:nrow(feature_indices), check=0){
-	feature_index2 = castAsScalar(feature_indices[i3,1])
-	feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
-	
-	feature = D[,feature_index2]
-	
-	if(feature_measurement_level == 0){
-		featureCorrection = 1 - min(feature)
-		feature = feature + featureCorrection
-			
-		if(label_measurement_level == feature_measurement_level){
-			# categorical-categorical
-			tests[feature_index2,1] = 1
-			[pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
-			stats[feature_index2,1] = pVal
-			
-			sz3=1
-			if(1==1){
-				sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-			}
-			contingencyTableLabelValues = matrix(0, rows=1, cols=sz3)
-			contingencyTableFeatureValues = matrix(0, rows=1, cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = read($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = read($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+	numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+	standard_deviations[label_index,1] = stdLabels
+}else{
+	labelCorrection = 1 - min(labels)
+	labels = labels + labelCorrection
+}
+
+mx = colMaxs(D)
+mn = colMins(D)	
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+	feature_index1 = castAsScalar(feature_indices[i1,1])
+	num = castAsScalar(num_distinct_values[1,feature_index1])
+	if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values){
+		max_num_distinct_values = num
+	}
+}
+distinct_label_values = matrix(0, rows=1, cols=1)	
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+	maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+	distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
+	if(max_num_distinct_values != 0){
+		contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
+	}
+	maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+	featureCounts[label_index,1:nrow(distinct_label_values)] = t(distinct_label_values)
+	parfor(i2 in 1:nrow(distinct_label_values)){
+		featureValues[label_index,i2] = i2-labelCorrection
+	}
+}
+
+parfor(i3 in 1:nrow(feature_indices), check=0){
+	feature_index2 = castAsScalar(feature_indices[i3,1])
+	feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
+	
+	feature = D[,feature_index2]
+	
+	if(feature_measurement_level == 0){
+		featureCorrection = 1 - min(feature)
+		feature = feature + featureCorrection
 			
-            parfor(i4 in 1:nrow(contingencyTable), check=0){
-				parfor(j in 1:ncol(contingencyTable), check=0){
-					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-				}
-			}
+		if(label_measurement_level == feature_measurement_level){
+			# categorical-categorical
+			tests[feature_index2,1] = 1
+			[pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
+			stats[feature_index2,1] = pVal
+			
+			sz3=1
+			if(1==1){
+				sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+			}
+			contingencyTableLabelValues = matrix(0, rows=1, cols=sz3)
+			contingencyTableFeatureValues = matrix(0, rows=1, cols=sz3)
+			
+            parfor(i4 in 1:nrow(contingencyTable), check=0){
+				parfor(j in 1:ncol(contingencyTable), check=0){
+					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+				}
+			}
 			contingencyTableCounts = matrix(contingencyTable, rows=1, cols=sz3, byrow=TRUE)
-            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-			
-			featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
-			parfor(i5 in 1:ncol(colMarginals), check=0){
-				featureValues[feature_index2,i5] = i5-featureCorrection
-			}
-		}else{
-			# label is scale, feature is categorical
-			tests[feature_index2,1] = 2
-			[pVal, frequencies, means, variances] = bivar_sc(labels, feature)
-			stats[feature_index2,1] = pVal
-			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
-			parfor(i6 in 1:nrow(frequencies), check=0){
-				featureValues[feature_index2,i6] = i6 - featureCorrection
-			}
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}else{
-		if(label_measurement_level == feature_measurement_level){
-			# scale-scale
-			tests[feature_index2,1] = 3
-			[r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-			stats[feature_index2,1] = r
-			covariances[feature_index2,1] = covariance
-			standard_deviations[feature_index2,1] = stdY
-		}else{
-			# label is categorical, feature is scale
-			tests[feature_index2,1] = 2
-			[pVal, frequencies, means, variances] = bivar_sc(feature, labels)
-			stats[feature_index2,1] = pVal
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}
-}
-
-write(stats, $6, format="text")
-write(tests, $7, format="text")
-write(covariances, $8, format="text")
-write(standard_deviations, $9, format="text")
-write(contingencyTablesCounts, $10, format="text")
-write(contingencyTablesLabelValues, $11, format="text")
-write(contingencyTablesFeatureValues, $12, format="text")
-write(featureValues, $13, format="text")
-write(featureCounts, $14, format="text")
-write(featureMeans, $15, format="text")
-write(featureSTDs, $16, format="text")
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] colMarginals) {
-
-    # Contingency Table
-    FF = table(A,B)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+			
+			featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
+			parfor(i5 in 1:ncol(colMarginals), check=0){
+				featureValues[feature_index2,i5] = i5-featureCorrection
+			}
+		}else{
+			# label is scale, feature is categorical
+			tests[feature_index2,1] = 2
+			[pVal, frequencies, means, variances] = bivar_sc(labels, feature)
+			stats[feature_index2,1] = pVal
+			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
+			parfor(i6 in 1:nrow(frequencies), check=0){
+				featureValues[feature_index2,i6] = i6 - featureCorrection
+			}
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}else{
+		if(label_measurement_level == feature_measurement_level){
+			# scale-scale
+			tests[feature_index2,1] = 3
+			[r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+			stats[feature_index2,1] = r
+			covariances[feature_index2,1] = covariance
+			standard_deviations[feature_index2,1] = stdY
+		}else{
+			# label is categorical, feature is scale
+			tests[feature_index2,1] = 2
+			[pVal, frequencies, means, variances] = bivar_sc(feature, labels)
+			stats[feature_index2,1] = pVal
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}
+}
+
+write(stats, $6, format="text")
+write(tests, $7, format="text")
+write(covariances, $8, format="text")
+write(standard_deviations, $9, format="text")
+write(contingencyTablesCounts, $10, format="text")
+write(contingencyTablesLabelValues, $11, format="text")
+write(contingencyTablesFeatureValues, $12, format="text")
+write(featureValues, $13, format="text")
+write(featureCounts, $14, format="text")
+write(featureMeans, $15, format="text")
+write(featureSTDs, $16, format="text")
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] colMarginals) {
+
+    # Contingency Table
+    FF = table(A,B)
+
     tmp = removeEmpty(target=FF, margin="rows"); 
     F = removeEmpty(target=tmp, margin="cols");
 
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (r %*% c)/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)^2/E
-    chi_squared = sum(T)
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
-
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-	# mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (r %*% c)/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)^2/E
+    chi_squared = sum(T)
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
+
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+	# mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
     present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), margin="rows")
     CFreqs = present_domain_vals_mat %*% CFreqs1
 
-    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="mean")
-    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="variance")
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-	anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
-}
+    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="mean")
+    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="variance")
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+	anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
index c899065..7fbc101 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = load($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = load($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = full(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = full(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = full(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = full(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1):
-    numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-    standard_deviations[label_index,1] = stdLabels
-else:
-    labelCorrection = 1 - min(labels)
-    labels = labels + labelCorrection
-
-mx = colMaxs(D)
-mn = colMins(D)
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)):
-    feature_index1 = castAsScalar(feature_indices[i1,1])
-    num = castAsScalar(num_distinct_values[1,feature_index1])
-    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values):
-        max_num_distinct_values = num
-distinct_label_values = full(0, rows=1, cols=1)
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0):
-    maxNumberOfGroups = max_num_distinct_values
-if(label_measurement_level==0):
-    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
-    if(max_num_distinct_values != 0):
-        contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
-    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-# store for contingency table cell values
-contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0):
-    featureCounts[label_index,1:nrow(distinct_label_values)] = transpose(distinct_label_values)
-    parfor(i2 in 1:nrow(distinct_label_values)):
-        featureValues[label_index,i2] = i2-labelCorrection
-
-parfor(i3 in 1:nrow(feature_indices), check=0):
-    feature_index2 = castAsScalar(feature_indices[i3,1])
-    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
-    
-    feature = D[,feature_index2]
-    
-    if(feature_measurement_level == 0):
-        featureCorrection = 1 - min(feature)
-        feature = feature + featureCorrection
-        
-        if(label_measurement_level == feature_measurement_level):
-            # categorical-categorical
-            tests[feature_index2,1] = 1
-            [pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
-            stats[feature_index2,1] = pVal
-            
-            sz3=1
-            if(1==1):
-                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
-            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = load($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = load($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = full(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = full(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = full(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = full(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1):
+    numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+    standard_deviations[label_index,1] = stdLabels
+else:
+    labelCorrection = 1 - min(labels)
+    labels = labels + labelCorrection
+
+mx = colMaxs(D)
+mn = colMins(D)
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)):
+    feature_index1 = castAsScalar(feature_indices[i1,1])
+    num = castAsScalar(num_distinct_values[1,feature_index1])
+    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values):
+        max_num_distinct_values = num
+distinct_label_values = full(0, rows=1, cols=1)
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0):
+    maxNumberOfGroups = max_num_distinct_values
+if(label_measurement_level==0):
+    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
+    if(max_num_distinct_values != 0):
+        contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
+    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+# store for contingency table cell values
+contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0):
+    featureCounts[label_index,1:nrow(distinct_label_values)] = transpose(distinct_label_values)
+    parfor(i2 in 1:nrow(distinct_label_values)):
+        featureValues[label_index,i2] = i2-labelCorrection
+
+parfor(i3 in 1:nrow(feature_indices), check=0):
+    feature_index2 = castAsScalar(feature_indices[i3,1])
+    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
+    
+    feature = D[,feature_index2]
+    
+    if(feature_measurement_level == 0):
+        featureCorrection = 1 - min(feature)
+        feature = feature + featureCorrection
+        
+        if(label_measurement_level == feature_measurement_level):
+            # categorical-categorical
+            tests[feature_index2,1] = 1
+            [pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
+            stats[feature_index2,1] = pVal
+            
+            sz3=1
+            if(1==1):
+                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
+            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
             
-            parfor(i4 in 1:nrow(contingencyTable), check=0):
-                parfor(j in 1:ncol(contingencyTable), check=0):
-                    contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-                    contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+            parfor(i4 in 1:nrow(contingencyTable), check=0):
+                parfor(j in 1:ncol(contingencyTable), check=0):
+                    contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+                    contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
             contingencyTableCounts = contingencyTable.reshape(rows=1, cols=sz3)
-            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-            contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-            contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-            
-            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
-            parfor(i5 in 1:ncol(colMarginals), check=0):
-                featureValues[feature_index2,i5] = i5-featureCorrection
-        else:
-            # label is scale, feature is categorical
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
-            stats[feature_index2,1] = pVal
-            featureCounts[feature_index2,1:nrow(frequencies)] = transpose(frequencies)
-            parfor(i6 in 1:nrow(frequencies), check=0):
-                featureValues[feature_index2,i6] = i6 - featureCorrection
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
-    else:
-        if(label_measurement_level == feature_measurement_level):
-            # scale-scale
-            tests[feature_index2,1] = 3
-            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-            stats[feature_index2,1] = r
-            covariances[feature_index2,1] = covariance
-            standard_deviations[feature_index2,1] = stdY
-        else:
-            # label is categorical, feature is scale
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
-            stats[feature_index2,1] = pVal
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
-    # end if(feature_measurement_level == 0)
-# end parfor(i3 in 1:nrow(feature_indices), check=0)
-
-save(stats, $6, format="text")
-save(tests, $7, format="text")
-save(covariances, $8, format="text")
-save(standard_deviations, $9, format="text")
-save(contingencyTablesCounts, $10, format="text")
-save(contingencyTablesLabelValues, $11, format="text")
-save(contingencyTablesFeatureValues, $12, format="text")
-save(featureValues, $13, format="text")
-save(featureCounts, $14, format="text")
-save(featureMeans, $15, format="text")
-save(featureSTDs, $16, format="text")
-
-# -----------------------------------------------------------------------------------------------------------
-
-def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, sigmaX:float, sigmaY:float):
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-    
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-    
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-# -----------------------------------------------------------------------------------------------------------
-
-def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, contingencyTable:matrix[float], rowMarginals:matrix[float], colMarginals:matrix[float]):
-    # Contingency Table
-    FF = table(A,B)
-    
-    tmp = removeEmpty(target=FF, axis=0)
-    F = removeEmpty(target=tmp, axis=1)
-    
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (dot(r, c))/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)**2/E
-    chi_squared = sum(T)
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
-    
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
-    # mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-    
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
-    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), axis=0)
-    CFreqs = dot(present_domain_vals_mat, CFreqs1)
-    
-    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="mean"))
-    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="variance"))
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-    
-    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+            contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+            contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+            
+            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
+            parfor(i5 in 1:ncol(colMarginals), check=0):
+                featureValues[feature_index2,i5] = i5-featureCorrection
+        else:
+            # label is scale, feature is categorical
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
+            stats[feature_index2,1] = pVal
+            featureCounts[feature_index2,1:nrow(frequencies)] = transpose(frequencies)
+            parfor(i6 in 1:nrow(frequencies), check=0):
+                featureValues[feature_index2,i6] = i6 - featureCorrection
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
+    else:
+        if(label_measurement_level == feature_measurement_level):
+            # scale-scale
+            tests[feature_index2,1] = 3
+            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+            stats[feature_index2,1] = r
+            covariances[feature_index2,1] = covariance
+            standard_deviations[feature_index2,1] = stdY
+        else:
+            # label is categorical, feature is scale
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
+            stats[feature_index2,1] = pVal
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
+    # end if(feature_measurement_level == 0)
+# end parfor(i3 in 1:nrow(feature_indices), check=0)
+
+save(stats, $6, format="text")
+save(tests, $7, format="text")
+save(covariances, $8, format="text")
+save(standard_deviations, $9, format="text")
+save(contingencyTablesCounts, $10, format="text")
+save(contingencyTablesLabelValues, $11, format="text")
+save(contingencyTablesFeatureValues, $12, format="text")
+save(featureValues, $13, format="text")
+save(featureCounts, $14, format="text")
+save(featureMeans, $15, format="text")
+save(featureSTDs, $16, format="text")
+
+# -----------------------------------------------------------------------------------------------------------
+
+def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, sigmaX:float, sigmaY:float):
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+    
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+    
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+# -----------------------------------------------------------------------------------------------------------
+
+def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, contingencyTable:matrix[float], rowMarginals:matrix[float], colMarginals:matrix[float]):
+    # Contingency Table
+    FF = table(A,B)
+    
+    tmp = removeEmpty(target=FF, axis=0)
+    F = removeEmpty(target=tmp, axis=1)
+    
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (dot(r, c))/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)**2/E
+    chi_squared = sum(T)
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
+    
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
+    # mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+    
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), axis=0)
+    CFreqs = dot(present_domain_vals_mat, CFreqs1)
+    
+    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="mean"))
+    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="variance"))
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+    
+    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
index dc65b8a..a3ca47a 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = as.double(args[3]);
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, 1, numFeatures)
-repClassSums = classSums %*% ones;
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, numRows, 1)
-D_w_ones = cbind(D, ones)
-model = cbind(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = max.col(log_probs,ties.method="last");
-acc = sum(pred == C) / numRows * 100
-
-print(paste("Training Accuracy (%): ", acc, sep=""))
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = as.double(args[3]);
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, 1, numFeatures)
+repClassSums = classSums %*% ones;
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, numRows, 1)
+D_w_ones = cbind(D, ones)
+model = cbind(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = max.col(log_probs,ties.method="last");
+acc = sum(pred == C) / numRows * 100
+
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));



[22/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/wfundInputGenerator1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/wfundInputGenerator1.dml b/src/test/scripts/applications/impute/wfundInputGenerator1.dml
index 7507958..8457fbd 100644
--- a/src/test/scripts/applications/impute/wfundInputGenerator1.dml
+++ b/src/test/scripts/applications/impute/wfundInputGenerator1.dml
@@ -1,469 +1,469 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 2013-10-08: THIS IS THE ATTEMPT TO IMPLEMENT HIDDEN STATE AS "HIDDEN REPORTS"
-# THE FIRST TERMS IN THE REPORTS MATRIX ARE THE HIDDEN REPORTS, THE LAST ARE THE KNOWN REPORTS
-#
-# THIS VERSION IS WITH "TRADITIONAL" REGRESSIONS
-#
-
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports
-#        test/scripts/applications/impute/initial_reports_preprocessed
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-is_GROUP_4_ENABLED = 1;    #   = 1 or 0
-num_known_terms = 6; # 20; # The number of   known   term reports, feel free to change
-num_predicted_terms = 1;   # The number of predicted term reports, feel free to change
-
-num_terms = 2 * num_known_terms + num_predicted_terms;
-num_attrs = 19;
-
-# Indicator matrix for the "known" values that should not be matched to hidden reports:
-disabled_known_values = matrix (0.0, rows = num_attrs, cols = num_known_terms);
-disabled_known_values [4, 3] = 1.0;
-disabled_known_values [5, 3] = 1.0;
-disabled_known_values [6, 3] = 1.0;
-disabled_known_values [7, 3] = 1.0;
-
-initial_reports_unprocessed = read ($1);
-initial_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-initial_reports [, 1:num_known_terms] = 
-    initial_reports_unprocessed [, 1:num_known_terms];
-initial_reports [, (num_known_terms + num_predicted_terms + 1):num_terms] = 
-    initial_reports_unprocessed [, 1:num_known_terms];
-
-num_frees_per_term = 13;
-if (is_GROUP_4_ENABLED == 1) {
-    num_frees_per_term = 15;
-}
-
-num_frees = (num_known_terms + num_predicted_terms) * num_frees_per_term;
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# All free variables are mapped to the "HIDDEN" reports
-# ---------------------------------------------------------
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-
-for (t in 1:(num_known_terms + num_predicted_terms))
-{
-    dt = (t-1) * num_attrs;
-    df = (t-1) * num_frees_per_term;
-# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
-# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
-    CReps [dt +  1, df +  1] = 1.0 + zero;
-    CReps [dt +  1, df +  2] = 1.0 + zero;
-    CReps [dt +  1, df +  3] = 1.0 + zero;
-    CReps [dt +  1, df +  4] = 1.0 + zero;
-    CReps [dt +  1, df +  5] = 1.0 + zero;
-    CReps [dt +  1, df +  6] = 1.0 + zero;
-    CReps [dt +  2, df +  1] = 1.0 + zero;
-    CReps [dt +  3, df +  2] = 1.0 + zero;
-    CReps [dt +  4, df +  3] = 1.0 + zero;
-    CReps [dt +  5, df +  4] = 1.0 + zero;
-    CReps [dt +  6, df +  5] = 1.0 + zero;
-    CReps [dt +  7, df +  6] = 1.0 + zero;
-
-# row 8 is free variable not appearing in any non-free variable
-    CReps [dt +  8, df +  7] = 1.0 + zero;
-
-# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
-# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
-    CReps [dt +  9, df +  8] = 1.0 + zero;
-    CReps [dt +  9, df +  9] = 1.0 + zero;
-    CReps [dt +  9, df + 10] = 1.0 + zero;
-    CReps [dt +  9, df + 11] = 1.0 + zero;
-    CReps [dt +  9, df + 12] = 1.0 + zero;
-    CReps [dt +  9, df + 13] = 1.0 + zero;
-    CReps [dt + 10, df +  8] = 1.0 + zero;
-    CReps [dt + 11, df +  9] = 1.0 + zero;
-    CReps [dt + 12, df + 10] = 1.0 + zero;
-    CReps [dt + 13, df + 11] = 1.0 + zero;
-    CReps [dt + 14, df + 12] = 1.0 + zero;
-    CReps [dt + 15, df + 13] = 1.0 + zero;
-
-# constraint that          row16 =  row14 +  row15
-# translated to free vars: row16 = free14 + free15
-    if (is_GROUP_4_ENABLED == 1) {
-        CReps [dt + 16, df + 14] = 1.0 + zero;
-        CReps [dt + 16, df + 15] = 1.0 + zero;
-        CReps [dt + 17, df + 14] = 1.0 + zero;
-        CReps [dt + 18, df + 15] = 1.0 + zero;
-    }
-
-# constraint that           row19 = total cost (all free variables)
-# translated to free vars:  row19 = all free variables
-    CReps [dt + 19, df +  1] = 1.0 + zero;
-    CReps [dt + 19, df +  2] = 1.0 + zero;
-    CReps [dt + 19, df +  3] = 1.0 + zero;
-    CReps [dt + 19, df +  4] = 1.0 + zero;
-    CReps [dt + 19, df +  5] = 1.0 + zero;
-    CReps [dt + 19, df +  6] = 1.0 + zero;
-    CReps [dt + 19, df +  7] = 1.0 + zero;
-    CReps [dt + 19, df +  8] = 1.0 + zero;
-    CReps [dt + 19, df +  9] = 1.0 + zero;
-    CReps [dt + 19, df + 10] = 1.0 + zero;
-    CReps [dt + 19, df + 11] = 1.0 + zero;
-    CReps [dt + 19, df + 12] = 1.0 + zero;
-    CReps [dt + 19, df + 13] = 1.0 + zero;
-    if (is_GROUP_4_ENABLED == 1) {
-        CReps [dt + 19, df + 14] = 1.0 + zero;
-        CReps [dt + 19, df + 15] = 1.0 + zero;
-    }
-}
-
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-# We have three types of regressions:
-# 1. For "hidden" reports:
-#    x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
-# 2. For "observed" reports:
-#    y[t]  ~  x[t] (with coefficient 1)
-# 3. For some parameters: the regularization equations.
-# All regressions follow the 4-factor pattern.
-num_factors = 4; 
-
-# We have one regression equation per time-term for each attribute, 
-# plus a few "special" regularization regression equations:
-num_regularization_regs = 12;
-if (is_GROUP_4_ENABLED == 1) {
-    num_regularization_regs = 16;
-}
-
-num_reg_eqs = num_terms * num_attrs + num_regularization_regs;
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# All regression equations for the same attribute share the same parameters, regardless
-# of the term; some parameters are shared across multiple attributes, (those attributes
-# whose behavior is believed to be similar) as specified in the table below:
-
-num_params = 28;
-if (is_GROUP_4_ENABLED == 1) {
-    num_params = 35;
-}
-
-# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
-#                                          self[t-2]
-# PARAMS:
-# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
-# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
-# Group 1:    "      prm#03       "          "       Row #03 = free#02
-# Group 1:    "      prm#04       "          "       Row #04 = free#03
-# Group 1:    "      prm#05       "          "       Row #05 = free#04
-# Group 1:    "      prm#06       "          "       Row #06 = free#05
-# Group 1:    "      prm#07       "          "       Row #07 = free#06
-# --------------------------------------------------------------------
-# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
-# --------------------------------------------------------------------
-# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
-# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
-# Group 3:    "      prm#17       "          "       Row #11 = free#09
-# Group 3:    "      prm#18       "          "       Row #12 = free#10
-# Group 3:    "      prm#19       "          "       Row #13 = free#11
-# Group 3:    "      prm#20       "          "       Row #14 = free#12
-# Group 3:    "      prm#21       "          "       Row #15 = free#13
-# --------------------------------------------------------------------
-# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
-# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
-# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
-# Group 4:    "      prm#31       "          "       Row #18 = free#15
-# --------------------------------------------------------------------
-# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
-# 
-# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
-#  the total cost in Group 5 regresses on the intercept.)
-
-# THE LAST REGULARIZATION "REGRESSION" EQUATIONS:
-# Factors:   1.0      -1.0       0.0        0.0
-# PARAMS:
-#          prm#27      1.0       0.0        0.0
-#          prm#28      0.0       0.0        0.0
-#          prm#08      0.0       0.0        0.0
-#          prm#09      0.0       0.0        0.0
-#          prm#10      0.0       0.0        0.0
-#          prm#11      0.0       0.0        0.0
-#          prm#13      0.0       0.0        0.0
-#          prm#14      0.0       0.0        0.0
-#          prm#22      0.0       0.0        0.0
-#          prm#23      0.0       0.0        0.0
-#          prm#24      0.0       0.0        0.0
-#          prm#25      0.0       0.0        0.0
-#          prm#32      0.0       0.0        0.0  # GROUP-4 ZEROS:
-#          prm#33      0.0       0.0        0.0  #   THESE EQUATIONS
-#          prm#34      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
-#          prm#35      0.0       0.0        0.0  #   AND DO NOT APPEAR
-
-
-# --------------------------------------------------------------
-# FIRST, AN AFFINE MAP FROM HIDDEN REPORTS TO REGRESSION FACTORS
-# --------------------------------------------------------------
-
-for (t in 1 : (num_known_terms + num_predicted_terms))
-{
-    for (i in 1 : num_attrs)
-    {
-      reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-    
-      if (i < 19)
-      {
-          agg_i = 19;
-          if (i >=  2 & 1 <=  7) {agg_i =  1;}
-          if (i >= 10 & 1 <= 15) {agg_i =  9;}
-          if (i >= 17 & 1 <= 18) {agg_i = 16;}
-
-          RegresValueMap [reg_index + 1, (t-1) * num_attrs +     i] = -1.0 + zero;  # 1st factor: -x[t]
-          RegresValueMap [reg_index + 2, (t-1) * num_attrs + agg_i] =  1.0 + zero;  # 2nd factor: aggregate[t]
-          if (t == 1) {
-              RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
-          } else {
-              RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
-          }
-          if (t >= 3) {
-              RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
-              RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
-          }
-      }
-
-# Regression for the TOTAL:
-
-      if (i == 19)
-      {
-          if (t >= 2) {
-              RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t]
-              RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
-              RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero; # 3rd factor: x[t-1]
-          }
-          if (t >= 3) {
-              RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] =  1.0 + zero; # 4th factor is
-              RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; #   x[t-1] - x[t-2]
-          }
-      }
-  }
-}
-
-# -----------------------------------------------------------------
-# SECOND, AN AFFINE MAP FROM OBSERVED REPORTS TO REGRESSION FACTORS
-# -----------------------------------------------------------------
-
-for (t in (num_known_terms + num_predicted_terms + 1) : num_terms)
-{
-    t2 = t - (num_known_terms + num_predicted_terms);
-    for (i in 1 : num_attrs) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t  - 1) * num_attrs + i] = -1.0 + zero; # 1st factor: -y[t]
-        RegresValueMap [reg_index + 2, (t2 - 1) * num_attrs + i] =  1.0 + zero; # 2nd factor:  x[t]
-    }
-}
-
-# -----------------------------------------------------
-# THIRD, AN AFFINE MAP FOR REGULARIZATION "REGRESSIONS"
-# -----------------------------------------------------
-
-reg_index = num_terms * num_attrs * num_factors;
-for (i in 1:num_regularization_regs)
-{
-    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
-    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
-    reg_index = reg_index + num_factors;
-}
-
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# -----------------------------------------------------------
-# FIRST, AN AFFINE MAP THAT COVERS HIDDEN REPORTS REGRESSIONS
-# -----------------------------------------------------------
-
-for (t in 1 : (num_known_terms + num_predicted_terms)) {
-# Group 1 attributes:
-    reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
-    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
-    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
-    for (i in 2 : 7) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
-        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
-        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
-    }
-# Group 2 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
-    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
-    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
-# Group 3 attributes:
-    reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
-    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
-    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
-    for (i in 10 : 15) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
-        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
-        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
-    }
-    
-# Group 4 attributes:
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
-    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
-    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
-    for (i in 17 : 18) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
-        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
-        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
-    }
-}
-
-# Group 5 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
-    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
-    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
-}
-
-# --------------------------------------------------------------
-# SECOND, AN AFFINE MAP THAT COVERS OBSERVED REPORTS REGRESSIONS
-# --------------------------------------------------------------
-
-for (t in (num_known_terms + num_predicted_terms + 1) : num_terms)
-{
-    for (i in 1 : num_attrs) {
-        if (castAsScalar (disabled_known_values [i, t - (num_known_terms + num_predicted_terms)]) == 0.0)
-        {
-            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0
-            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; # Default coefficient = 1.0
-        }
-    }
-}
-
-# -------------------------------------------------------------
-# THIRD, AN AFFINE MAP THAT COVERS REGULARIZATION "REGRESSIONS"
-# -------------------------------------------------------------
-
-reg_index = num_terms * num_attrs * num_factors;
-    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
-    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;  # Param #28
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;  # Param #08
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;  # Param #09
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;  # Param #10
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;  # Param #11
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;  # Param #14
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 22] = 1.0 + zero;  # Param #22
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;  # Param #23
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;  # Param #24
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;  # Param #25
-
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 34] = 1.0 + zero;  # Param #34
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 35] = 1.0 + zero;  # Param #35
-}
-
-# ----------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
-# ----------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-
-global_weight = 0.5 + zero;
-
-attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
-max_attr_size = max (attribute_size);
-
-for (t in 1 : num_terms) {
-    for (i in 1 : num_attrs) {
-        regeqn = (t-1) * num_attrs + i;
-        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
-        acceptable_drift = scale_down * max_attr_size * 0.001;
-        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-    }
-}
-
-for (i in 1 : num_regularization_regs) {
-    regeqn = num_terms * num_attrs + i;
-    acceptable_drift = 0.01;
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-}
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-write (initial_reports,    $2, format="text");
-write (CReps,              $3, format="text");
-write (RegresValueMap,     $4, format="text");
-write (RegresFactorDefault,$5, format="text");
-write (RegresParamMap,     $6, format="text");
-write (RegresCoeffDefault, $7, format="text");
-write (RegresScaleMult,    $8, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 2013-10-08: THIS IS THE ATTEMPT TO IMPLEMENT HIDDEN STATE AS "HIDDEN REPORTS"
+# THE FIRST TERMS IN THE REPORTS MATRIX ARE THE HIDDEN REPORTS, THE LAST ARE THE KNOWN REPORTS
+#
+# THIS VERSION IS WITH "TRADITIONAL" REGRESSIONS
+#
+
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports
+#        test/scripts/applications/impute/initial_reports_preprocessed
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+is_GROUP_4_ENABLED = 1;    #   = 1 or 0
+num_known_terms = 6; # 20; # The number of   known   term reports, feel free to change
+num_predicted_terms = 1;   # The number of predicted term reports, feel free to change
+
+num_terms = 2 * num_known_terms + num_predicted_terms;
+num_attrs = 19;
+
+# Indicator matrix for the "known" values that should not be matched to hidden reports:
+disabled_known_values = matrix (0.0, rows = num_attrs, cols = num_known_terms);
+disabled_known_values [4, 3] = 1.0;
+disabled_known_values [5, 3] = 1.0;
+disabled_known_values [6, 3] = 1.0;
+disabled_known_values [7, 3] = 1.0;
+
+initial_reports_unprocessed = read ($1);
+initial_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+initial_reports [, 1:num_known_terms] = 
+    initial_reports_unprocessed [, 1:num_known_terms];
+initial_reports [, (num_known_terms + num_predicted_terms + 1):num_terms] = 
+    initial_reports_unprocessed [, 1:num_known_terms];
+
+num_frees_per_term = 13;
+if (is_GROUP_4_ENABLED == 1) {
+    num_frees_per_term = 15;
+}
+
+num_frees = (num_known_terms + num_predicted_terms) * num_frees_per_term;
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# All free variables are mapped to the "HIDDEN" reports
+# ---------------------------------------------------------
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+
+for (t in 1:(num_known_terms + num_predicted_terms))
+{
+    dt = (t-1) * num_attrs;
+    df = (t-1) * num_frees_per_term;
+# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
+# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
+    CReps [dt +  1, df +  1] = 1.0 + zero;
+    CReps [dt +  1, df +  2] = 1.0 + zero;
+    CReps [dt +  1, df +  3] = 1.0 + zero;
+    CReps [dt +  1, df +  4] = 1.0 + zero;
+    CReps [dt +  1, df +  5] = 1.0 + zero;
+    CReps [dt +  1, df +  6] = 1.0 + zero;
+    CReps [dt +  2, df +  1] = 1.0 + zero;
+    CReps [dt +  3, df +  2] = 1.0 + zero;
+    CReps [dt +  4, df +  3] = 1.0 + zero;
+    CReps [dt +  5, df +  4] = 1.0 + zero;
+    CReps [dt +  6, df +  5] = 1.0 + zero;
+    CReps [dt +  7, df +  6] = 1.0 + zero;
+
+# row 8 is free variable not appearing in any non-free variable
+    CReps [dt +  8, df +  7] = 1.0 + zero;
+
+# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
+# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
+    CReps [dt +  9, df +  8] = 1.0 + zero;
+    CReps [dt +  9, df +  9] = 1.0 + zero;
+    CReps [dt +  9, df + 10] = 1.0 + zero;
+    CReps [dt +  9, df + 11] = 1.0 + zero;
+    CReps [dt +  9, df + 12] = 1.0 + zero;
+    CReps [dt +  9, df + 13] = 1.0 + zero;
+    CReps [dt + 10, df +  8] = 1.0 + zero;
+    CReps [dt + 11, df +  9] = 1.0 + zero;
+    CReps [dt + 12, df + 10] = 1.0 + zero;
+    CReps [dt + 13, df + 11] = 1.0 + zero;
+    CReps [dt + 14, df + 12] = 1.0 + zero;
+    CReps [dt + 15, df + 13] = 1.0 + zero;
+
+# constraint that          row16 =  row14 +  row15
+# translated to free vars: row16 = free14 + free15
+    if (is_GROUP_4_ENABLED == 1) {
+        CReps [dt + 16, df + 14] = 1.0 + zero;
+        CReps [dt + 16, df + 15] = 1.0 + zero;
+        CReps [dt + 17, df + 14] = 1.0 + zero;
+        CReps [dt + 18, df + 15] = 1.0 + zero;
+    }
+
+# constraint that           row19 = total cost (all free variables)
+# translated to free vars:  row19 = all free variables
+    CReps [dt + 19, df +  1] = 1.0 + zero;
+    CReps [dt + 19, df +  2] = 1.0 + zero;
+    CReps [dt + 19, df +  3] = 1.0 + zero;
+    CReps [dt + 19, df +  4] = 1.0 + zero;
+    CReps [dt + 19, df +  5] = 1.0 + zero;
+    CReps [dt + 19, df +  6] = 1.0 + zero;
+    CReps [dt + 19, df +  7] = 1.0 + zero;
+    CReps [dt + 19, df +  8] = 1.0 + zero;
+    CReps [dt + 19, df +  9] = 1.0 + zero;
+    CReps [dt + 19, df + 10] = 1.0 + zero;
+    CReps [dt + 19, df + 11] = 1.0 + zero;
+    CReps [dt + 19, df + 12] = 1.0 + zero;
+    CReps [dt + 19, df + 13] = 1.0 + zero;
+    if (is_GROUP_4_ENABLED == 1) {
+        CReps [dt + 19, df + 14] = 1.0 + zero;
+        CReps [dt + 19, df + 15] = 1.0 + zero;
+    }
+}
+
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+# We have three types of regressions:
+# 1. For "hidden" reports:
+#    x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
+# 2. For "observed" reports:
+#    y[t]  ~  x[t] (with coefficient 1)
+# 3. For some parameters: the regularization equations.
+# All regressions follow the 4-factor pattern.
+num_factors = 4; 
+
+# We have one regression equation per time-term for each attribute, 
+# plus a few "special" regularization regression equations:
+num_regularization_regs = 12;
+if (is_GROUP_4_ENABLED == 1) {
+    num_regularization_regs = 16;
+}
+
+num_reg_eqs = num_terms * num_attrs + num_regularization_regs;
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# All regression equations for the same attribute share the same parameters, regardless
+# of the term; some parameters are shared across multiple attributes, (those attributes
+# whose behavior is believed to be similar) as specified in the table below:
+
+num_params = 28;
+if (is_GROUP_4_ENABLED == 1) {
+    num_params = 35;
+}
+
+# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
+#                                          self[t-2]
+# PARAMS:
+# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
+# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
+# Group 1:    "      prm#03       "          "       Row #03 = free#02
+# Group 1:    "      prm#04       "          "       Row #04 = free#03
+# Group 1:    "      prm#05       "          "       Row #05 = free#04
+# Group 1:    "      prm#06       "          "       Row #06 = free#05
+# Group 1:    "      prm#07       "          "       Row #07 = free#06
+# --------------------------------------------------------------------
+# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
+# --------------------------------------------------------------------
+# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
+# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
+# Group 3:    "      prm#17       "          "       Row #11 = free#09
+# Group 3:    "      prm#18       "          "       Row #12 = free#10
+# Group 3:    "      prm#19       "          "       Row #13 = free#11
+# Group 3:    "      prm#20       "          "       Row #14 = free#12
+# Group 3:    "      prm#21       "          "       Row #15 = free#13
+# --------------------------------------------------------------------
+# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
+# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
+# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
+# Group 4:    "      prm#31       "          "       Row #18 = free#15
+# --------------------------------------------------------------------
+# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
+# 
+# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
+#  the total cost in Group 5 regresses on the intercept.)
+
+# THE LAST REGULARIZATION "REGRESSION" EQUATIONS:
+# Factors:   1.0      -1.0       0.0        0.0
+# PARAMS:
+#          prm#27      1.0       0.0        0.0
+#          prm#28      0.0       0.0        0.0
+#          prm#08      0.0       0.0        0.0
+#          prm#09      0.0       0.0        0.0
+#          prm#10      0.0       0.0        0.0
+#          prm#11      0.0       0.0        0.0
+#          prm#13      0.0       0.0        0.0
+#          prm#14      0.0       0.0        0.0
+#          prm#22      0.0       0.0        0.0
+#          prm#23      0.0       0.0        0.0
+#          prm#24      0.0       0.0        0.0
+#          prm#25      0.0       0.0        0.0
+#          prm#32      0.0       0.0        0.0  # GROUP-4 ZEROS:
+#          prm#33      0.0       0.0        0.0  #   THESE EQUATIONS
+#          prm#34      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
+#          prm#35      0.0       0.0        0.0  #   AND DO NOT APPEAR
+
+
+# --------------------------------------------------------------
+# FIRST, AN AFFINE MAP FROM HIDDEN REPORTS TO REGRESSION FACTORS
+# --------------------------------------------------------------
+
+for (t in 1 : (num_known_terms + num_predicted_terms))
+{
+    for (i in 1 : num_attrs)
+    {
+      reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+    
+      if (i < 19)
+      {
+          agg_i = 19;
+          if (i >=  2 & 1 <=  7) {agg_i =  1;}
+          if (i >= 10 & 1 <= 15) {agg_i =  9;}
+          if (i >= 17 & 1 <= 18) {agg_i = 16;}
+
+          RegresValueMap [reg_index + 1, (t-1) * num_attrs +     i] = -1.0 + zero;  # 1st factor: -x[t]
+          RegresValueMap [reg_index + 2, (t-1) * num_attrs + agg_i] =  1.0 + zero;  # 2nd factor: aggregate[t]
+          if (t == 1) {
+              RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
+          } else {
+              RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
+          }
+          if (t >= 3) {
+              RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
+              RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
+          }
+      }
+
+# Regression for the TOTAL:
+
+      if (i == 19)
+      {
+          if (t >= 2) {
+              RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t]
+              RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
+              RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero; # 3rd factor: x[t-1]
+          }
+          if (t >= 3) {
+              RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] =  1.0 + zero; # 4th factor is
+              RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; #   x[t-1] - x[t-2]
+          }
+      }
+  }
+}
+
+# -----------------------------------------------------------------
+# SECOND, AN AFFINE MAP FROM OBSERVED REPORTS TO REGRESSION FACTORS
+# -----------------------------------------------------------------
+
+for (t in (num_known_terms + num_predicted_terms + 1) : num_terms)
+{
+    t2 = t - (num_known_terms + num_predicted_terms);
+    for (i in 1 : num_attrs) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t  - 1) * num_attrs + i] = -1.0 + zero; # 1st factor: -y[t]
+        RegresValueMap [reg_index + 2, (t2 - 1) * num_attrs + i] =  1.0 + zero; # 2nd factor:  x[t]
+    }
+}
+
+# -----------------------------------------------------
+# THIRD, AN AFFINE MAP FOR REGULARIZATION "REGRESSIONS"
+# -----------------------------------------------------
+
+reg_index = num_terms * num_attrs * num_factors;
+for (i in 1:num_regularization_regs)
+{
+    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
+    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
+    reg_index = reg_index + num_factors;
+}
+
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# -----------------------------------------------------------
+# FIRST, AN AFFINE MAP THAT COVERS HIDDEN REPORTS REGRESSIONS
+# -----------------------------------------------------------
+
+for (t in 1 : (num_known_terms + num_predicted_terms)) {
+# Group 1 attributes:
+    reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
+    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
+    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
+    for (i in 2 : 7) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
+        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
+        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
+    }
+# Group 2 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
+    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
+    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
+# Group 3 attributes:
+    reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
+    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
+    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
+    for (i in 10 : 15) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
+        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
+        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
+    }
+    
+# Group 4 attributes:
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
+    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
+    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
+    for (i in 17 : 18) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
+        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
+        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
+    }
+}
+
+# Group 5 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
+    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
+    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
+}
+
+# --------------------------------------------------------------
+# SECOND, AN AFFINE MAP THAT COVERS OBSERVED REPORTS REGRESSIONS
+# --------------------------------------------------------------
+
+for (t in (num_known_terms + num_predicted_terms + 1) : num_terms)
+{
+    for (i in 1 : num_attrs) {
+        if (castAsScalar (disabled_known_values [i, t - (num_known_terms + num_predicted_terms)]) == 0.0)
+        {
+            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0
+            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; # Default coefficient = 1.0
+        }
+    }
+}
+
+# -------------------------------------------------------------
+# THIRD, AN AFFINE MAP THAT COVERS REGULARIZATION "REGRESSIONS"
+# -------------------------------------------------------------
+
+reg_index = num_terms * num_attrs * num_factors;
+    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
+    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;  # Param #28
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;  # Param #08
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;  # Param #09
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;  # Param #10
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;  # Param #11
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;  # Param #14
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 22] = 1.0 + zero;  # Param #22
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;  # Param #23
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;  # Param #24
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;  # Param #25
+
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 34] = 1.0 + zero;  # Param #34
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 35] = 1.0 + zero;  # Param #35
+}
+
+# ----------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
+# ----------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+
+global_weight = 0.5 + zero;
+
+attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
+max_attr_size = max (attribute_size);
+
+for (t in 1 : num_terms) {
+    for (i in 1 : num_attrs) {
+        regeqn = (t-1) * num_attrs + i;
+        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
+        acceptable_drift = scale_down * max_attr_size * 0.001;
+        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+    }
+}
+
+for (i in 1 : num_regularization_regs) {
+    regeqn = num_terms * num_attrs + i;
+    acceptable_drift = 0.01;
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+}
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+write (initial_reports,    $2, format="text");
+write (CReps,              $3, format="text");
+write (RegresValueMap,     $4, format="text");
+write (RegresFactorDefault,$5, format="text");
+write (RegresParamMap,     $6, format="text");
+write (RegresCoeffDefault, $7, format="text");
+write (RegresScaleMult,    $8, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/wfundInputGenerator2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/wfundInputGenerator2.dml b/src/test/scripts/applications/impute/wfundInputGenerator2.dml
index 52845ea..e6d302d 100644
--- a/src/test/scripts/applications/impute/wfundInputGenerator2.dml
+++ b/src/test/scripts/applications/impute/wfundInputGenerator2.dml
@@ -1,446 +1,446 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 2013-10-08: THIS IS THE ATTEMPT TO IMPLEMENT HIDDEN STATE AS "HIDDEN REPORTS"
-# THE FIRST TERMS IN THE REPORTS MATRIX ARE THE HIDDEN REPORTS, THE LAST ARE THE KNOWN REPORTS
-#
-# THIS VERSION IS WITH "DIFFERENTIAL" REGRESSIONS & AUXILIARY ATTRIBUTES
-#
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator2.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports_unprocessed
-#        test/scripts/applications/impute/initial_reports_preprocessed
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-num_observed_attrs = 19;       #  The number of attributes in the report
-num_auxiliary_attrs = 5;       #  The number of extra attributes used to decompose the observed ones
-num_attrs = num_observed_attrs + num_auxiliary_attrs;
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# -------------------------------------------
-#  FEEL FREE / DON'T FORGET TO CHANGE THESE:
-# -------------------------------------------
-
-is_GROUP_4_ENABLED   = 0;      #  = 1 or 0
-is_FLIPPING_ENABLED  = 0;      #  = 1 or 0  DISABLE THIS!
-is_QUARTERLY_ENABLED = 1;      #  = 1 or 0  (enabled for sabesp)
-is_OCTALLY_ENABLED   = 0;      #  = 1 or 0  DISABLE THIS!
-
-num_known_terms = 20;          #  The number of   known   term reports
-num_predicted_terms = 1;       #  The number of predicted term reports
-num_state_terms = num_known_terms + num_predicted_terms;
-
-# Indicator matrix to show which report values should NOT be penalized
-# because of their difference between "observed" and "hidden" reports:
-
-disabled_known_values = matrix (0.0, rows = num_observed_attrs, cols = num_known_terms);
-# disabled_known_values [4, 3] = 1.0 + zero;
-# disabled_known_values [5, 3] = 1.0 + zero;
-# disabled_known_values [6, 3] = 1.0 + zero;
-# disabled_known_values [7, 3] = 1.0 + zero;
-
-
-# --------------------------------------------------------
-#  subtotals_tree [i, 1] = the closest subtotal attribute
-#  " 0" means that this attribute's values are constants
-#  "-1" means that this attribute is a root total
-# --------------------------------------------------------
-
-subtotals_tree = matrix (0.0, rows = num_attrs, cols = 1);
-
-subtotals_tree [ 1, 1] = 19 + zero;  subtotals_tree [ 9, 1] = 19 + zero;
-subtotals_tree [ 2, 1] =  1 + zero;  subtotals_tree [10, 1] =  9 + zero;
-subtotals_tree [ 3, 1] =  1 + zero;  subtotals_tree [11, 1] =  9 + zero;
-subtotals_tree [ 4, 1] =  1 + zero;  subtotals_tree [12, 1] =  9 + zero;
-subtotals_tree [ 5, 1] =  1 + zero;  subtotals_tree [13, 1] =  9 + zero;
-subtotals_tree [ 6, 1] =  1 + zero;  subtotals_tree [14, 1] =  9 + zero;
-subtotals_tree [ 7, 1] =  1 + zero;  subtotals_tree [15, 1] =  9 + zero;
-subtotals_tree [ 8, 1] = 19 + zero;  subtotals_tree [19, 1] = -1 + zero; # TOTAL
-
-if (is_GROUP_4_ENABLED == 1) {
-    subtotals_tree [16, 1] = 19 + zero;
-    subtotals_tree [17, 1] = 16 + zero;
-    subtotals_tree [18, 1] = 16 + zero;
-}
-
-subtotals_tree [20, 1] = -1 + zero;  # Auxiliary TOTAL
-subtotals_tree [21, 1] = 20 + zero;
-if (is_FLIPPING_ENABLED  == 1) {subtotals_tree [22, 1] = 20 + zero;}
-if (is_QUARTERLY_ENABLED == 1) {subtotals_tree [23, 1] = 20 + zero;}
-if (is_OCTALLY_ENABLED   == 1) {subtotals_tree [24, 1] = 20 + zero;}
-
-# -------------------------------------------------------------------
-#  We have two full column-slots for every report: one slot for the 
-#  "hidden" report (# i) and one slot for the "observed" report 
-#  (# i + num_state_terms).  Only the "hidden" part has degrees of
-#  freedom associated with it; the "observed" part is kept constant.
-#  We penalize most "hidden" values if they deviate too far from the
-#  "observed" values.  We also use this penalty to regularize
-#  auxiliary attributes and/or predicted reports, in which case their
-#  "observed" counterparts are set to zero.
-# -------------------------------------------------------------------
-
-num_terms = 2 * num_state_terms;
-
-initial_reports_unprocessed = read ($1);
-initial_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-initial_reports [1:num_observed_attrs, 1:num_known_terms] = 
-    initial_reports_unprocessed [1:num_observed_attrs, 1:num_known_terms];
-initial_reports [1:num_observed_attrs, (num_state_terms + 1) : (num_state_terms + num_known_terms)] = 
-    initial_reports_unprocessed [1:num_observed_attrs, 1:num_known_terms];
-
-disabled_known_values_extended = matrix (0.0, rows = num_attrs, cols = num_state_terms);
-disabled_known_values_extended [1:num_observed_attrs, 1:num_known_terms] = disabled_known_values;
-disabled_known_values = disabled_known_values_extended;
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# All free variables are mapped to the "HIDDEN" reports
-# ---------------------------------------------------------
-
-is_free = matrix (1.0, rows = num_attrs, cols = 1);
-for (i in 1:num_attrs) {
-    j = castAsScalar (subtotals_tree [i, 1]);
-    if (j > 0.0) {
-        is_free [j, 1] = 0.0 + zero;
-    } else {
-        if (j == 0.0) {
-            is_free [i, 1] = 0.0 + zero;
-}   }   }
-num_frees_per_term = sum (is_free);
-num_frees = num_state_terms * num_frees_per_term;
-
-CReps_block = matrix (0.0, rows = num_attrs, cols = num_frees_per_term);
-index_free = 0;
-for (i in 1:num_attrs) {
-    if (castAsScalar (is_free [i, 1]) == 1.0) {
-        index_free = index_free + 1;
-        j = i;
-        while (j > 0.0) {
-            CReps_block [j, index_free] = 1.0 + zero;
-            j = castAsScalar (subtotals_tree [j, 1]);
-}   }   }
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-for (t in 1:num_state_terms)
-{
-    dt = (t-1) * num_attrs;
-    df = (t-1) * num_frees_per_term;
-    CReps [(dt + 1) : (dt + num_attrs), (df + 1) : (df + num_frees_per_term)] = CReps_block;
-}
-
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-# We have one regression equation per time-term for each attribute, plus a few special
-# regularization "regression" equations.  There are three types of regressions:
-# 1. For "hidden" reports:
-#   x[t] ~ subtotal[t], x[t-1], (x[t-1] - x[t-2])
-#   (TOTAL[t] - TOTAL[t-1]) ~ (TOTAL[t-1] - TOTAL[t-2]), aux_1[t] (with coeff. 1)
-#     where aux_1[t] = aux_2[t] + ... + aux_5[t] (implemented as hard constraint)
-# 2. For "observed" reports:
-#   y[t]  ~  x[t] (with coefficient 1)
-# 3. For all parameters: regularization equations.
-# All regressions follow the 4-factor pattern.
-
-num_factors = 4; 
-num_params  = 18 * 3 + 1;
-num_reg_eqs = num_terms * num_attrs + num_params;
-
-# All regression equations for the same attribute share the same parameters, regardless
-# of the term; some parameters may be shared across multiple attributes, (those attributes
-# whose behavior is believed to be similar) as specified in the table below:
-
-# NON-TOTAL OBSERVED ATTRIBUTE REGRESSION EQUATIONS:
-#
-# Factors:                                                (x[t-1] -          
-#                     -x[t]       agg[t]       x[t-1]       x[t-2])        
-# -----------------------------------------------------------------------------
-# Row #i = 1...18:     1.0      prm[3*i-1]    prm[3*i]    prm[3*i+1]
-# (Must have: agg = subtotals_tree [i, 1] > 0.0)
-# -----------------------------------------------------------------------------
-
-# TOTAL AND AUXILIARY ATTRIBUTE REGRESSION EQUATIONS:
-#
-# Factors:          -(x[t] -    (x[t-1] -      
-#                     x[t-1])     x[t-2])      x[t-1]      aux_1[t]
-# -----------------------------------------------------------------------------
-# TOTAL (Row #19):     1.0        prm[1]        0.0          1.0      
-# aux_1 (Row #20):     0.0         0.0          0.0          0.0 
-# aux_2 (Row #21):     1.0         1.0          0.0          0.0     "steady"
-# aux_3 (Row #22):     1.0         1.0         -4.0          0.0    "flipping"
-# aux_4 (Row #23):     1.0         1.0         -2.0          0.0    "quarterly"
-# aux_5 (Row #24):     1.0         1.0       sqrt(2)-2       0.0     "octally"
-# -----------------------------------------------------------------------------
-
-# THE LAST REGULARIZATION "REGRESSION" EQUATIONS:
-#
-# Factors:            -1.0         1.0          0.0          0.0
-# -----------------------------------------------------------------------------
-# For prm[1]:         prm[1]       0.0 ?        0.0          0.0  ???
-# For i = 1...18:   prm[3*i-1]     0.0          0.0          0.0  if subtotals_tree [i, 1] == 0.0
-#                    prm[3*i]      1.0          0.0          0.0
-#                   prm[3*i+1]     0.0          0.0          0.0
-# For all others:      0.0         0.0          0.0          0.0
-# -----------------------------------------------------------------------------
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# --------------------------------------------------------------
-# FIRST, AN AFFINE MAP FROM HIDDEN REPORTS TO REGRESSION FACTORS
-# --------------------------------------------------------------
-
-for (t in 2 : num_state_terms) {
-    for (i in 1 : num_attrs) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        agg = castAsScalar (subtotals_tree [i, 1]);
-        if (i <= 18 & agg > 0)
-        {
-            RegresValueMap [reg_index + 1, (t-1) * num_attrs +  i ]   = -1.0 + zero;  # 1st factor: -x[t]
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs + agg]   =  1.0 + zero;  # 2nd factor: agg[t]
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i ]   =  1.0 + zero;  # 3rd factor: x[t-1]
-            if (t == 2) {
-                RegresValueMap [reg_index + 4, (t-1) * num_attrs + i] =  1.0 + zero;  # 4th factor:
-                RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = -1.0 + zero;  #   x[t] - x[t-1]
-            } else {
-                RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero;  # 4th factor:
-                RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero;  # x[t-1] - x[t-2]
-            }
-### RegresFactorDefault [reg_index + 4, 1] = 1.0 + zero;  # 4th factor: Intercept
-        }
-        if ((i == 19 | i >= 21) & t >= 3 & agg != 0)
-        {
-            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-            RegresValueMap [reg_index + 1, (t-1) * num_attrs +  i]    = -1.0 + zero;  # 1st factor:
-            RegresValueMap [reg_index + 1, (t-2) * num_attrs +  i]    =  1.0 + zero;  #   - x[t] + x[t-1]
-            RegresValueMap [reg_index + 2, (t-2) * num_attrs +  i]    =  1.0 + zero;  # 2nd factor:
-            RegresValueMap [reg_index + 2, (t-3) * num_attrs +  i]    = -1.0 + zero;  #   x[t-1] - x[t-2]
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i]    =  1.0 + zero;  # 3rd factor: x[t-1]
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 20]    =  1.0 + zero;  # 4th factor: aux_1[t]
-}   }   }
-
-# ----------------------------------------------------------------------------------------
-# SECOND, AN AFFINE MAP FROM OBSERVED REPORTS TO REGRESSION FACTORS FOR HIDDEN-TO-OBSERVED
-#   REPORTS MATCHING AND/OR REPORT VALUE REGULARIZATION
-# NOTE THAT WE REGULARIZE AUXILIARY ATTRIBUTES BY MATCHING THEM TO ZEROS!
-# ----------------------------------------------------------------------------------------
-
-for (t1 in (num_state_terms + 1) : num_terms) {
-    t2 = t1 - num_state_terms;
-    for (i in 1 : num_attrs) {
-        if ((i <= num_observed_attrs & t2 <= num_known_terms & castAsScalar (disabled_known_values [i, t2]) == 0.0) |
-            (i > num_observed_attrs & castAsScalar (subtotals_tree [i, 1]) > 0.0))
-        {
-            reg_index = ((t1 - 1) * num_attrs - 1 + i) * num_factors;
-            RegresValueMap [reg_index + 1, (t1 - 1) * num_attrs + i] = -1.0 + zero; # 1st factor: -y[t]
-            RegresValueMap [reg_index + 2, (t2 - 1) * num_attrs + i] =  1.0 + zero; # 2nd factor:  x[t]
-}   }   }
-
-# -----------------------------------------------------------------------
-# THIRD, AN AFFINE MAP THAT COVERS PARAMETER REGULARIZATION "REGRESSIONS"
-# -----------------------------------------------------------------------
-
-reg_index_base = num_terms * num_attrs * num_factors;
-for (param in 1:num_params)
-{
-    reg_index = reg_index_base + (param - 1) * num_factors;
-    RegresFactorDefault [reg_index + 1, 1] = -1.0 + zero;
-    RegresFactorDefault [reg_index + 2, 1] =  1.0 + zero;
-}
-
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# -----------------------------------------------------------
-# FIRST, AN AFFINE MAP THAT COVERS HIDDEN REPORTS REGRESSIONS
-# -----------------------------------------------------------
-
-for (t in 2 : num_state_terms) {
-    for (i in 1 : num_observed_attrs) {
-        if (castAsScalar (subtotals_tree [i, 1]) > 0.0) {
-            param_1 = 3 * i - 1;
-            param_2 = 3 * i;
-            param_3 = 3 * i + 1;
-            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-            RegresCoeffDefault [reg_index + 1, 1]    = 1.0 + zero;
-            RegresParamMap [reg_index + 2,  param_1] = 1.0 + zero;
-            RegresParamMap [reg_index + 3,  param_2] = 1.0 + zero;
-            RegresParamMap [reg_index + 4,  param_3] = 1.0 + zero;
-    }   }
-
-    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
-    RegresParamMap     [reg_index + 2, 1] = 1.0 + zero; # prm[1]
-    RegresCoeffDefault [reg_index + 4, 1] = 1.0 + zero;
-    
-    for (i in (num_observed_attrs + 1) : num_attrs) {    
-        if (castAsScalar (subtotals_tree [i, 1]) > 0.0) {
-            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
-            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-            if (i == 22) {
-                RegresCoeffDefault [reg_index + 3, 1] = -4.0 + zero;
-            }
-            if (i == 23) {
-                RegresCoeffDefault [reg_index + 3, 1] = -2.0 + zero;
-            }
-            if (i == 24) {
-                RegresCoeffDefault [reg_index + 3, 1] = sqrt (2.0) - 2.0 + zero;
-}   }   }   }
-
-# -----------------------------------------------------------------------
-# SECOND, AN AFFINE MAP THAT COVERS HIDDEN-TO-OBSERVED REPORTS MATCHING
-#   AND/OR REPORT VALUE REGULARIZATION
-# NOTE THAT WE REGULARIZE AUXILIARY ATTRIBUTES BY MATCHING THEM TO ZEROS!
-# -----------------------------------------------------------------------
-
-for (t1 in (num_state_terms + 1) : num_terms) {
-    t2 = t1 - num_state_terms;
-    for (i in 1 : num_attrs) {
-        if ((i <= num_observed_attrs & t2 <= num_known_terms & castAsScalar (disabled_known_values [i, t2]) == 0.0) |
-            (i > num_observed_attrs & castAsScalar (subtotals_tree [i, 1]) > 0.0))
-        {
-            reg_index = ((t1 - 1) * num_attrs - 1 + i) * num_factors;
-            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
-            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-}   }   }
-
-# -----------------------------------------------------------------------
-# THIRD, AN AFFINE MAP THAT COVERS PARAMETER REGULARIZATION "REGRESSIONS"
-# -----------------------------------------------------------------------
-
-reg_index_base = num_terms * num_attrs * num_factors;
-
-param = 1;
-
-reg_index = reg_index_base + (param - 1) * num_factors;
-RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
-RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
-
-for (i in 1 : num_observed_attrs) {
-    agg = castAsScalar (subtotals_tree [i, 1]);
-    if (agg >= 0.0)
-    {
-        param = 3 * i - 1;
-        
-        if (agg == 0.0) {
-            reg_index = reg_index_base + (param - 1) * num_factors;
-            RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
-            RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
-        }
-        
-        param = 3 * i;
-        
-        reg_index = reg_index_base + (param - 1) * num_factors;
-        RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
-        RegresCoeffDefault [reg_index + 2,   1  ] = 1.0 + zero;
-
-        param = 3 * i + 1;
-        
-        reg_index = reg_index_base + (param - 1) * num_factors;
-        RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
-        RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
-    }
-}
-
-
-# ----------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
-# ----------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-
-global_weight = 0.5 + zero;
-
-attribute_size = rowMeans (abs (initial_reports [1:num_observed_attrs, 1:num_known_terms]));
-max_attr_size = max (attribute_size);
-difference_size = rowMeans (abs (initial_reports [1:num_observed_attrs, 2:num_known_terms] 
-    - initial_reports [1:num_observed_attrs, 1:(num_known_terms-1)]));
-max_diff_size = max (difference_size);
-
-for (i in 1 : num_attrs)
-{
-    scale_factor = 1.0;
-    if (i <= num_observed_attrs) {
-        ### CORRECTION FOR OBSERVED ATTRIBUTES:
-        attribute_size_i = castAsScalar (attribute_size [i, 1]);
-        scale_factor = sqrt (attribute_size_i / max_attr_size) * 0.999 + 0.001;
-    }
-    for (t in 1 : num_terms) {
-        if (t <= num_state_terms) {
-        ### HIDDEN-STATE RECURRENCE REGRESSIONS
-            if (i <= num_observed_attrs) {
-            ### RECURRENCES FOR OBSERVED ATTRIBUTES:
-                acceptable_drift = scale_factor * max_attr_size * 0.0005;
-            } else {
-            ### RECURRENCES FOR AUXILIARY ATTRIBUTES:
-                acceptable_drift = scale_factor * max_diff_size * 0.0005;
-            }
-        } else {
-        ### MATCHING AND REGULARIZATION
-            if (i <= num_observed_attrs) {
-            ### MATCHING OF HIDDEN WITH OBSERVED ATTRIBUTES:
-                acceptable_drift = scale_factor * max_attr_size * 0.001;
-            } else {
-            ### REGULARIZATION OF AUXILIARY ATTRIBUTES:
-                acceptable_drift = scale_factor * max_diff_size * 0.1;
-        }   }
-        regeqn = (t-1) * num_attrs + i;
-        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-    }
-}
-
-for (i in 1 : num_params) {
-    regeqn = num_terms * num_attrs + i;
-    acceptable_drift = 0.05;
-    if (i == 1) {
-        acceptable_drift = 0.01; # 0.005;
-    }
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-}
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-write (initial_reports,    $2, format="text");
-write (CReps,              $3, format="text");
-write (RegresValueMap,     $4, format="text");
-write (RegresFactorDefault,$5, format="text");
-write (RegresParamMap,     $6, format="text");
-write (RegresCoeffDefault, $7, format="text");
-write (RegresScaleMult,    $8, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 2013-10-08: THIS IS THE ATTEMPT TO IMPLEMENT HIDDEN STATE AS "HIDDEN REPORTS"
+# THE FIRST TERMS IN THE REPORTS MATRIX ARE THE HIDDEN REPORTS, THE LAST ARE THE KNOWN REPORTS
+#
+# THIS VERSION IS WITH "DIFFERENTIAL" REGRESSIONS & AUXILIARY ATTRIBUTES
+#
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator2.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports_unprocessed
+#        test/scripts/applications/impute/initial_reports_preprocessed
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+num_observed_attrs = 19;       #  The number of attributes in the report
+num_auxiliary_attrs = 5;       #  The number of extra attributes used to decompose the observed ones
+num_attrs = num_observed_attrs + num_auxiliary_attrs;
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# -------------------------------------------
+#  FEEL FREE / DON'T FORGET TO CHANGE THESE:
+# -------------------------------------------
+
+is_GROUP_4_ENABLED   = 0;      #  = 1 or 0
+is_FLIPPING_ENABLED  = 0;      #  = 1 or 0  DISABLE THIS!
+is_QUARTERLY_ENABLED = 1;      #  = 1 or 0  (enabled for sabesp)
+is_OCTALLY_ENABLED   = 0;      #  = 1 or 0  DISABLE THIS!
+
+num_known_terms = 20;          #  The number of   known   term reports
+num_predicted_terms = 1;       #  The number of predicted term reports
+num_state_terms = num_known_terms + num_predicted_terms;
+
+# Indicator matrix to show which report values should NOT be penalized
+# because of their difference between "observed" and "hidden" reports:
+
+disabled_known_values = matrix (0.0, rows = num_observed_attrs, cols = num_known_terms);
+# disabled_known_values [4, 3] = 1.0 + zero;
+# disabled_known_values [5, 3] = 1.0 + zero;
+# disabled_known_values [6, 3] = 1.0 + zero;
+# disabled_known_values [7, 3] = 1.0 + zero;
+
+
+# --------------------------------------------------------
+#  subtotals_tree [i, 1] = the closest subtotal attribute
+#  " 0" means that this attribute's values are constants
+#  "-1" means that this attribute is a root total
+# --------------------------------------------------------
+
+subtotals_tree = matrix (0.0, rows = num_attrs, cols = 1);
+
+subtotals_tree [ 1, 1] = 19 + zero;  subtotals_tree [ 9, 1] = 19 + zero;
+subtotals_tree [ 2, 1] =  1 + zero;  subtotals_tree [10, 1] =  9 + zero;
+subtotals_tree [ 3, 1] =  1 + zero;  subtotals_tree [11, 1] =  9 + zero;
+subtotals_tree [ 4, 1] =  1 + zero;  subtotals_tree [12, 1] =  9 + zero;
+subtotals_tree [ 5, 1] =  1 + zero;  subtotals_tree [13, 1] =  9 + zero;
+subtotals_tree [ 6, 1] =  1 + zero;  subtotals_tree [14, 1] =  9 + zero;
+subtotals_tree [ 7, 1] =  1 + zero;  subtotals_tree [15, 1] =  9 + zero;
+subtotals_tree [ 8, 1] = 19 + zero;  subtotals_tree [19, 1] = -1 + zero; # TOTAL
+
+if (is_GROUP_4_ENABLED == 1) {
+    subtotals_tree [16, 1] = 19 + zero;
+    subtotals_tree [17, 1] = 16 + zero;
+    subtotals_tree [18, 1] = 16 + zero;
+}
+
+subtotals_tree [20, 1] = -1 + zero;  # Auxiliary TOTAL
+subtotals_tree [21, 1] = 20 + zero;
+if (is_FLIPPING_ENABLED  == 1) {subtotals_tree [22, 1] = 20 + zero;}
+if (is_QUARTERLY_ENABLED == 1) {subtotals_tree [23, 1] = 20 + zero;}
+if (is_OCTALLY_ENABLED   == 1) {subtotals_tree [24, 1] = 20 + zero;}
+
+# -------------------------------------------------------------------
+#  We have two full column-slots for every report: one slot for the 
+#  "hidden" report (# i) and one slot for the "observed" report 
+#  (# i + num_state_terms).  Only the "hidden" part has degrees of
+#  freedom associated with it; the "observed" part is kept constant.
+#  We penalize most "hidden" values if they deviate too far from the
+#  "observed" values.  We also use this penalty to regularize
+#  auxiliary attributes and/or predicted reports, in which case their
+#  "observed" counterparts are set to zero.
+# -------------------------------------------------------------------
+
+num_terms = 2 * num_state_terms;
+
+initial_reports_unprocessed = read ($1);
+initial_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+initial_reports [1:num_observed_attrs, 1:num_known_terms] = 
+    initial_reports_unprocessed [1:num_observed_attrs, 1:num_known_terms];
+initial_reports [1:num_observed_attrs, (num_state_terms + 1) : (num_state_terms + num_known_terms)] = 
+    initial_reports_unprocessed [1:num_observed_attrs, 1:num_known_terms];
+
+disabled_known_values_extended = matrix (0.0, rows = num_attrs, cols = num_state_terms);
+disabled_known_values_extended [1:num_observed_attrs, 1:num_known_terms] = disabled_known_values;
+disabled_known_values = disabled_known_values_extended;
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# All free variables are mapped to the "HIDDEN" reports
+# ---------------------------------------------------------
+
+is_free = matrix (1.0, rows = num_attrs, cols = 1);
+for (i in 1:num_attrs) {
+    j = castAsScalar (subtotals_tree [i, 1]);
+    if (j > 0.0) {
+        is_free [j, 1] = 0.0 + zero;
+    } else {
+        if (j == 0.0) {
+            is_free [i, 1] = 0.0 + zero;
+}   }   }
+num_frees_per_term = sum (is_free);
+num_frees = num_state_terms * num_frees_per_term;
+
+CReps_block = matrix (0.0, rows = num_attrs, cols = num_frees_per_term);
+index_free = 0;
+for (i in 1:num_attrs) {
+    if (castAsScalar (is_free [i, 1]) == 1.0) {
+        index_free = index_free + 1;
+        j = i;
+        while (j > 0.0) {
+            CReps_block [j, index_free] = 1.0 + zero;
+            j = castAsScalar (subtotals_tree [j, 1]);
+}   }   }
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+for (t in 1:num_state_terms)
+{
+    dt = (t-1) * num_attrs;
+    df = (t-1) * num_frees_per_term;
+    CReps [(dt + 1) : (dt + num_attrs), (df + 1) : (df + num_frees_per_term)] = CReps_block;
+}
+
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+# We have one regression equation per time-term for each attribute, plus a few special
+# regularization "regression" equations.  There are three types of regressions:
+# 1. For "hidden" reports:
+#   x[t] ~ subtotal[t], x[t-1], (x[t-1] - x[t-2])
+#   (TOTAL[t] - TOTAL[t-1]) ~ (TOTAL[t-1] - TOTAL[t-2]), aux_1[t] (with coeff. 1)
+#     where aux_1[t] = aux_2[t] + ... + aux_5[t] (implemented as hard constraint)
+# 2. For "observed" reports:
+#   y[t]  ~  x[t] (with coefficient 1)
+# 3. For all parameters: regularization equations.
+# All regressions follow the 4-factor pattern.
+
+num_factors = 4; 
+num_params  = 18 * 3 + 1;
+num_reg_eqs = num_terms * num_attrs + num_params;
+
+# All regression equations for the same attribute share the same parameters, regardless
+# of the term; some parameters may be shared across multiple attributes, (those attributes
+# whose behavior is believed to be similar) as specified in the table below:
+
+# NON-TOTAL OBSERVED ATTRIBUTE REGRESSION EQUATIONS:
+#
+# Factors:                                                (x[t-1] -          
+#                     -x[t]       agg[t]       x[t-1]       x[t-2])        
+# -----------------------------------------------------------------------------
+# Row #i = 1...18:     1.0      prm[3*i-1]    prm[3*i]    prm[3*i+1]
+# (Must have: agg = subtotals_tree [i, 1] > 0.0)
+# -----------------------------------------------------------------------------
+
+# TOTAL AND AUXILIARY ATTRIBUTE REGRESSION EQUATIONS:
+#
+# Factors:          -(x[t] -    (x[t-1] -      
+#                     x[t-1])     x[t-2])      x[t-1]      aux_1[t]
+# -----------------------------------------------------------------------------
+# TOTAL (Row #19):     1.0        prm[1]        0.0          1.0      
+# aux_1 (Row #20):     0.0         0.0          0.0          0.0 
+# aux_2 (Row #21):     1.0         1.0          0.0          0.0     "steady"
+# aux_3 (Row #22):     1.0         1.0         -4.0          0.0    "flipping"
+# aux_4 (Row #23):     1.0         1.0         -2.0          0.0    "quarterly"
+# aux_5 (Row #24):     1.0         1.0       sqrt(2)-2       0.0     "octally"
+# -----------------------------------------------------------------------------
+
+# THE LAST REGULARIZATION "REGRESSION" EQUATIONS:
+#
+# Factors:            -1.0         1.0          0.0          0.0
+# -----------------------------------------------------------------------------
+# For prm[1]:         prm[1]       0.0 ?        0.0          0.0  ???
+# For i = 1...18:   prm[3*i-1]     0.0          0.0          0.0  if subtotals_tree [i, 1] == 0.0
+#                    prm[3*i]      1.0          0.0          0.0
+#                   prm[3*i+1]     0.0          0.0          0.0
+# For all others:      0.0         0.0          0.0          0.0
+# -----------------------------------------------------------------------------
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# --------------------------------------------------------------
+# FIRST, AN AFFINE MAP FROM HIDDEN REPORTS TO REGRESSION FACTORS
+# --------------------------------------------------------------
+
+for (t in 2 : num_state_terms) {
+    for (i in 1 : num_attrs) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        agg = castAsScalar (subtotals_tree [i, 1]);
+        if (i <= 18 & agg > 0)
+        {
+            RegresValueMap [reg_index + 1, (t-1) * num_attrs +  i ]   = -1.0 + zero;  # 1st factor: -x[t]
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs + agg]   =  1.0 + zero;  # 2nd factor: agg[t]
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i ]   =  1.0 + zero;  # 3rd factor: x[t-1]
+            if (t == 2) {
+                RegresValueMap [reg_index + 4, (t-1) * num_attrs + i] =  1.0 + zero;  # 4th factor:
+                RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = -1.0 + zero;  #   x[t] - x[t-1]
+            } else {
+                RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero;  # 4th factor:
+                RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero;  # x[t-1] - x[t-2]
+            }
+### RegresFactorDefault [reg_index + 4, 1] = 1.0 + zero;  # 4th factor: Intercept
+        }
+        if ((i == 19 | i >= 21) & t >= 3 & agg != 0)
+        {
+            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+            RegresValueMap [reg_index + 1, (t-1) * num_attrs +  i]    = -1.0 + zero;  # 1st factor:
+            RegresValueMap [reg_index + 1, (t-2) * num_attrs +  i]    =  1.0 + zero;  #   - x[t] + x[t-1]
+            RegresValueMap [reg_index + 2, (t-2) * num_attrs +  i]    =  1.0 + zero;  # 2nd factor:
+            RegresValueMap [reg_index + 2, (t-3) * num_attrs +  i]    = -1.0 + zero;  #   x[t-1] - x[t-2]
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i]    =  1.0 + zero;  # 3rd factor: x[t-1]
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 20]    =  1.0 + zero;  # 4th factor: aux_1[t]
+}   }   }
+
+# ----------------------------------------------------------------------------------------
+# SECOND, AN AFFINE MAP FROM OBSERVED REPORTS TO REGRESSION FACTORS FOR HIDDEN-TO-OBSERVED
+#   REPORTS MATCHING AND/OR REPORT VALUE REGULARIZATION
+# NOTE THAT WE REGULARIZE AUXILIARY ATTRIBUTES BY MATCHING THEM TO ZEROS!
+# ----------------------------------------------------------------------------------------
+
+for (t1 in (num_state_terms + 1) : num_terms) {
+    t2 = t1 - num_state_terms;
+    for (i in 1 : num_attrs) {
+        if ((i <= num_observed_attrs & t2 <= num_known_terms & castAsScalar (disabled_known_values [i, t2]) == 0.0) |
+            (i > num_observed_attrs & castAsScalar (subtotals_tree [i, 1]) > 0.0))
+        {
+            reg_index = ((t1 - 1) * num_attrs - 1 + i) * num_factors;
+            RegresValueMap [reg_index + 1, (t1 - 1) * num_attrs + i] = -1.0 + zero; # 1st factor: -y[t]
+            RegresValueMap [reg_index + 2, (t2 - 1) * num_attrs + i] =  1.0 + zero; # 2nd factor:  x[t]
+}   }   }
+
+# -----------------------------------------------------------------------
+# THIRD, AN AFFINE MAP THAT COVERS PARAMETER REGULARIZATION "REGRESSIONS"
+# -----------------------------------------------------------------------
+
+reg_index_base = num_terms * num_attrs * num_factors;
+for (param in 1:num_params)
+{
+    reg_index = reg_index_base + (param - 1) * num_factors;
+    RegresFactorDefault [reg_index + 1, 1] = -1.0 + zero;
+    RegresFactorDefault [reg_index + 2, 1] =  1.0 + zero;
+}
+
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# -----------------------------------------------------------
+# FIRST, AN AFFINE MAP THAT COVERS HIDDEN REPORTS REGRESSIONS
+# -----------------------------------------------------------
+
+for (t in 2 : num_state_terms) {
+    for (i in 1 : num_observed_attrs) {
+        if (castAsScalar (subtotals_tree [i, 1]) > 0.0) {
+            param_1 = 3 * i - 1;
+            param_2 = 3 * i;
+            param_3 = 3 * i + 1;
+            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+            RegresCoeffDefault [reg_index + 1, 1]    = 1.0 + zero;
+            RegresParamMap [reg_index + 2,  param_1] = 1.0 + zero;
+            RegresParamMap [reg_index + 3,  param_2] = 1.0 + zero;
+            RegresParamMap [reg_index + 4,  param_3] = 1.0 + zero;
+    }   }
+
+    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
+    RegresParamMap     [reg_index + 2, 1] = 1.0 + zero; # prm[1]
+    RegresCoeffDefault [reg_index + 4, 1] = 1.0 + zero;
+    
+    for (i in (num_observed_attrs + 1) : num_attrs) {    
+        if (castAsScalar (subtotals_tree [i, 1]) > 0.0) {
+            reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
+            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+            if (i == 22) {
+                RegresCoeffDefault [reg_index + 3, 1] = -4.0 + zero;
+            }
+            if (i == 23) {
+                RegresCoeffDefault [reg_index + 3, 1] = -2.0 + zero;
+            }
+            if (i == 24) {
+                RegresCoeffDefault [reg_index + 3, 1] = sqrt (2.0) - 2.0 + zero;
+}   }   }   }
+
+# -----------------------------------------------------------------------
+# SECOND, AN AFFINE MAP THAT COVERS HIDDEN-TO-OBSERVED REPORTS MATCHING
+#   AND/OR REPORT VALUE REGULARIZATION
+# NOTE THAT WE REGULARIZE AUXILIARY ATTRIBUTES BY MATCHING THEM TO ZEROS!
+# -----------------------------------------------------------------------
+
+for (t1 in (num_state_terms + 1) : num_terms) {
+    t2 = t1 - num_state_terms;
+    for (i in 1 : num_attrs) {
+        if ((i <= num_observed_attrs & t2 <= num_known_terms & castAsScalar (disabled_known_values [i, t2]) == 0.0) |
+            (i > num_observed_attrs & castAsScalar (subtotals_tree [i, 1]) > 0.0))
+        {
+            reg_index = ((t1 - 1) * num_attrs - 1 + i) * num_factors;
+            RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;
+            RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+}   }   }
+
+# -----------------------------------------------------------------------
+# THIRD, AN AFFINE MAP THAT COVERS PARAMETER REGULARIZATION "REGRESSIONS"
+# -----------------------------------------------------------------------
+
+reg_index_base = num_terms * num_attrs * num_factors;
+
+param = 1;
+
+reg_index = reg_index_base + (param - 1) * num_factors;
+RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
+RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
+
+for (i in 1 : num_observed_attrs) {
+    agg = castAsScalar (subtotals_tree [i, 1]);
+    if (agg >= 0.0)
+    {
+        param = 3 * i - 1;
+        
+        if (agg == 0.0) {
+            reg_index = reg_index_base + (param - 1) * num_factors;
+            RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
+            RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
+        }
+        
+        param = 3 * i;
+        
+        reg_index = reg_index_base + (param - 1) * num_factors;
+        RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
+        RegresCoeffDefault [reg_index + 2,   1  ] = 1.0 + zero;
+
+        param = 3 * i + 1;
+        
+        reg_index = reg_index_base + (param - 1) * num_factors;
+        RegresParamMap     [reg_index + 1, param] = 1.0 + zero;
+        RegresCoeffDefault [reg_index + 2,   1  ] = 0.0 + zero;
+    }
+}
+
+
+# ----------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
+# ----------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+
+global_weight = 0.5 + zero;
+
+attribute_size = rowMeans (abs (initial_reports [1:num_observed_attrs, 1:num_known_terms]));
+max_attr_size = max (attribute_size);
+difference_size = rowMeans (abs (initial_reports [1:num_observed_attrs, 2:num_known_terms] 
+    - initial_reports [1:num_observed_attrs, 1:(num_known_terms-1)]));
+max_diff_size = max (difference_size);
+
+for (i in 1 : num_attrs)
+{
+    scale_factor = 1.0;
+    if (i <= num_observed_attrs) {
+        ### CORRECTION FOR OBSERVED ATTRIBUTES:
+        attribute_size_i = castAsScalar (attribute_size [i, 1]);
+        scale_factor = sqrt (attribute_size_i / max_attr_size) * 0.999 + 0.001;
+    }
+    for (t in 1 : num_terms) {
+        if (t <= num_state_terms) {
+        ### HIDDEN-STATE RECURRENCE REGRESSIONS
+            if (i <= num_observed_attrs) {
+            ### RECURRENCES FOR OBSERVED ATTRIBUTES:
+                acceptable_drift = scale_factor * max_attr_size * 0.0005;
+            } else {
+            ### RECURRENCES FOR AUXILIARY ATTRIBUTES:
+                acceptable_drift = scale_factor * max_diff_size * 0.0005;
+            }
+        } else {
+        ### MATCHING AND REGULARIZATION
+            if (i <= num_observed_attrs) {
+            ### MATCHING OF HIDDEN WITH OBSERVED ATTRIBUTES:
+                acceptable_drift = scale_factor * max_attr_size * 0.001;
+            } else {
+            ### REGULARIZATION OF AUXILIARY ATTRIBUTES:
+                acceptable_drift = scale_factor * max_diff_size * 0.1;
+        }   }
+        regeqn = (t-1) * num_attrs + i;
+        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+    }
+}
+
+for (i in 1 : num_params) {
+    regeqn = num_terms * num_attrs + i;
+    acceptable_drift = 0.05;
+    if (i == 1) {
+        acceptable_drift = 0.01; # 0.005;
+    }
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+}
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+write (initial_reports,    $2, format="text");
+write (CReps,              $3, format="text");
+write (RegresValueMap,     $4, format="text");
+write (RegresFactorDefault,$5, format="text");
+write (RegresParamMap,     $6, format="text");
+write (RegresCoeffDefault, $7, format="text");
+write (RegresScaleMult,    $8, format="text");


[34/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/GTFMTDReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/GTFMTDReducer.java b/src/main/java/org/apache/sysml/runtime/transform/GTFMTDReducer.java
index 1a646cf..2e3fd75 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/GTFMTDReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/GTFMTDReducer.java
@@ -1,124 +1,124 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.wink.json4j.JSONException;
-
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-
-public class GTFMTDReducer implements Reducer<IntWritable, DistinctValue, Text, LongWritable> {
-	
-	private JobConf _rJob = null;
-	TfUtils _agents = null;
-	
-	@Override
-	public void configure(JobConf job) {
-		_rJob = job;
-		
-		try {
-			String outputDir = MRJobConfiguration.getOutputs(job)[0];
-			_agents = new TfUtils(job, outputDir);
-		} 
-		catch(IOException e)  { throw new RuntimeException(e); }
-		catch(JSONException e)  { throw new RuntimeException(e); }
-	}
-
-	@Override
-	public void close() throws IOException {
-	}
-	
-	@Override
-	public void reduce(IntWritable key, Iterator<DistinctValue> values,
-			OutputCollector<Text, LongWritable> output, Reporter reporter)
-			throws IOException {
-		
-		FileSystem fs = FileSystem.get(_rJob);
-		
-		int colID = key.get();
-		
-		if(colID < 0) 
-		{
-			// process mapper output for MV and Bin agents
-			colID = colID*-1;
-			_agents.getMVImputeAgent().mergeAndOutputTransformationMetadata(values, _agents.getTfMtdDir(), colID, fs, _agents);
-		}
-		else if ( colID == _agents.getNumCols() + 1)
-		{
-			// process mapper output for OFFSET_FILE
-			ArrayList<OffsetCount> list = new ArrayList<OffsetCount>();
-			while(values.hasNext())
-				list.add(new OffsetCount(values.next().getOffsetCount()));
-			
-			long numTfRows = generateOffsetsFile(list);
-			reporter.incrCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS, numTfRows);
-
-		}
-		else 
-		{
-			// process mapper output for Recode agent
-			_agents.getRecodeAgent().mergeAndOutputTransformationMetadata(values, _agents.getTfMtdDir(), colID, fs, _agents);
-		}
-		
-	}
-	
-	@SuppressWarnings("unchecked")
-	private long generateOffsetsFile(ArrayList<OffsetCount> list) throws IllegalArgumentException, IOException 
-	{
-		Collections.sort(list);
-		
-		@SuppressWarnings("deprecation")
-		SequenceFile.Writer writer = new SequenceFile.Writer(
-				FileSystem.get(_rJob), _rJob, 
-				new Path(_agents.getOffsetFile()+"/part-00000"), 
-				ByteWritable.class, OffsetCount.class);
-		
-		long lineOffset=0;
-		for(OffsetCount oc: list)
-		{
-			long count=oc.count;
-			oc.count=lineOffset;
-			writer.append(new ByteWritable((byte)0), oc);
-			lineOffset+=count;
-		}
-		writer.close();
-		list.clear();
-		
-		return lineOffset;
-	}
-	
-}
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.wink.json4j.JSONException;
+
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+
+public class GTFMTDReducer implements Reducer<IntWritable, DistinctValue, Text, LongWritable> {
+	
+	private JobConf _rJob = null;
+	TfUtils _agents = null;
+	
+	@Override
+	public void configure(JobConf job) {
+		_rJob = job;
+		
+		try {
+			String outputDir = MRJobConfiguration.getOutputs(job)[0];
+			_agents = new TfUtils(job, outputDir);
+		} 
+		catch(IOException e)  { throw new RuntimeException(e); }
+		catch(JSONException e)  { throw new RuntimeException(e); }
+	}
+
+	@Override
+	public void close() throws IOException {
+	}
+	
+	@Override
+	public void reduce(IntWritable key, Iterator<DistinctValue> values,
+			OutputCollector<Text, LongWritable> output, Reporter reporter)
+			throws IOException {
+		
+		FileSystem fs = FileSystem.get(_rJob);
+		
+		int colID = key.get();
+		
+		if(colID < 0) 
+		{
+			// process mapper output for MV and Bin agents
+			colID = colID*-1;
+			_agents.getMVImputeAgent().mergeAndOutputTransformationMetadata(values, _agents.getTfMtdDir(), colID, fs, _agents);
+		}
+		else if ( colID == _agents.getNumCols() + 1)
+		{
+			// process mapper output for OFFSET_FILE
+			ArrayList<OffsetCount> list = new ArrayList<OffsetCount>();
+			while(values.hasNext())
+				list.add(new OffsetCount(values.next().getOffsetCount()));
+			
+			long numTfRows = generateOffsetsFile(list);
+			reporter.incrCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS, numTfRows);
+
+		}
+		else 
+		{
+			// process mapper output for Recode agent
+			_agents.getRecodeAgent().mergeAndOutputTransformationMetadata(values, _agents.getTfMtdDir(), colID, fs, _agents);
+		}
+		
+	}
+	
+	@SuppressWarnings("unchecked")
+	private long generateOffsetsFile(ArrayList<OffsetCount> list) throws IllegalArgumentException, IOException 
+	{
+		Collections.sort(list);
+		
+		@SuppressWarnings("deprecation")
+		SequenceFile.Writer writer = new SequenceFile.Writer(
+				FileSystem.get(_rJob), _rJob, 
+				new Path(_agents.getOffsetFile()+"/part-00000"), 
+				ByteWritable.class, OffsetCount.class);
+		
+		long lineOffset=0;
+		for(OffsetCount oc: list)
+		{
+			long count=oc.count;
+			oc.count=lineOffset;
+			writer.append(new ByteWritable((byte)0), oc);
+			lineOffset+=count;
+		}
+		writer.close();
+		list.clear();
+		
+		return lineOffset;
+	}
+	
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdMR.java b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdMR.java
index b1e79dd..09b9148 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdMR.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdMR.java
@@ -1,106 +1,106 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Counters;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-
-import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-/**
- * MR Job to Generate Transform Metadata based on a given transformation specification file (JSON format).
- *
- */
-
-public class GenTfMtdMR {
-
-	public static final String DELIM = ",";
-
-	public static long runJob(String inputPath, String txMtdPath, String specFileWithIDs, String smallestFile, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
-		JobConf job = new JobConf(GenTfMtdMR.class);
-		job.setJobName("GenTfMTD");
-		
-		/* Setup MapReduce Job */
-		job.setJarByClass(GenTfMtdMR.class);
-		
-		// set relevant classes
-		job.setMapperClass(GTFMTDMapper.class);
-		job.setReducerClass(GTFMTDReducer.class);
-	
-		// set input and output properties
-		job.setInputFormat(TextInputFormat.class);
-		job.setOutputFormat(NullOutputFormat.class);
-		
-		job.setMapOutputKeyClass(IntWritable.class);
-		job.setMapOutputValueClass(DistinctValue.class);
-		
-		job.setOutputKeyClass(Text.class);
-		job.setOutputValueClass(LongWritable.class);
-		
-		job.setInt("dfs.replication", replication);
-
-		FileInputFormat.addInputPath(job, new Path(inputPath));
-		// delete outputPath, if exists already.
-		Path outPath = new Path(txMtdPath);
-		FileSystem fs = FileSystem.get(job);
-		fs.delete(outPath, true);
-		FileOutputFormat.setOutputPath(job, outPath);
-
-		job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
-		job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
-		if ( inputDataProperties.getNAStrings() != null)
-			// Adding "dummy" string to handle the case of na_strings = ""
-			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
-		job.set(MRJobConfiguration.TF_SPEC_FILE, specFileWithIDs);
-		job.set(MRJobConfiguration.TF_SMALLEST_FILE, smallestFile);
-		job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
-		job.set(MRJobConfiguration.TF_HEADER, headerLine);
-		
-		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, txMtdPath);
-		
-		// offsets file to store part-file names and offsets for each input split
-		job.set(MRJobConfiguration.TF_OFFSETS_FILE, partOffsetsFile);
-		
-		//turn off adaptivemr
-		job.setBoolean("adaptivemr.map.enable", false);
-		
-		// Run the job
-		RunningJob runjob = JobClient.runJob(job);
-		
-		Counters c = runjob.getCounters();
-		long tx_numRows = c.findCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS).getCounter();
-
-		return tx_numRows;
-	}
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+/**
+ * MR Job to Generate Transform Metadata based on a given transformation specification file (JSON format).
+ *
+ */
+
+public class GenTfMtdMR {
+
+	public static final String DELIM = ",";
+
+	public static long runJob(String inputPath, String txMtdPath, String specFileWithIDs, String smallestFile, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
+		JobConf job = new JobConf(GenTfMtdMR.class);
+		job.setJobName("GenTfMTD");
+		
+		/* Setup MapReduce Job */
+		job.setJarByClass(GenTfMtdMR.class);
+		
+		// set relevant classes
+		job.setMapperClass(GTFMTDMapper.class);
+		job.setReducerClass(GTFMTDReducer.class);
+	
+		// set input and output properties
+		job.setInputFormat(TextInputFormat.class);
+		job.setOutputFormat(NullOutputFormat.class);
+		
+		job.setMapOutputKeyClass(IntWritable.class);
+		job.setMapOutputValueClass(DistinctValue.class);
+		
+		job.setOutputKeyClass(Text.class);
+		job.setOutputValueClass(LongWritable.class);
+		
+		job.setInt("dfs.replication", replication);
+
+		FileInputFormat.addInputPath(job, new Path(inputPath));
+		// delete outputPath, if exists already.
+		Path outPath = new Path(txMtdPath);
+		FileSystem fs = FileSystem.get(job);
+		fs.delete(outPath, true);
+		FileOutputFormat.setOutputPath(job, outPath);
+
+		job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
+		job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
+		if ( inputDataProperties.getNAStrings() != null)
+			// Adding "dummy" string to handle the case of na_strings = ""
+			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
+		job.set(MRJobConfiguration.TF_SPEC_FILE, specFileWithIDs);
+		job.set(MRJobConfiguration.TF_SMALLEST_FILE, smallestFile);
+		job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
+		job.set(MRJobConfiguration.TF_HEADER, headerLine);
+		
+		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, txMtdPath);
+		
+		// offsets file to store part-file names and offsets for each input split
+		job.set(MRJobConfiguration.TF_OFFSETS_FILE, partOffsetsFile);
+		
+		//turn off adaptivemr
+		job.setBoolean("adaptivemr.map.enable", false);
+		
+		// Run the job
+		RunningJob runjob = JobClient.runJob(job);
+		
+		Counters c = runjob.getCounters();
+		long tx_numRows = c.findCounter(MRJobConfiguration.DataTransformCounters.TRANSFORMED_NUM_ROWS).getCounter();
+
+		return tx_numRows;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java
index 6b811ef..e0644ff 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/GenTfMtdSPARK.java
@@ -1,235 +1,235 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.Function2;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import scala.Tuple2;
-
-import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
-
-public class GenTfMtdSPARK {
-
-	/**
-	 * Spark code to Generate Transform Metadata based on the given transformation
-	 * specification file (JSON format).
-	 * 
-	 */
-
-	public static long runSparkJob(SparkExecutionContext sec, JavaRDD<Tuple2<LongWritable, Text>> inputRDD, 
-									String tfMtdPath, String specFile, 
-									String partOffsetsFile, CSVFileFormatProperties prop, 
-									long numCols, String headerLine
-								) throws IOException, ClassNotFoundException, InterruptedException, IllegalArgumentException, JSONException {
-		
-		// Construct transformation metadata (map-side)
-		// Note: logic is similar to GTFMTDMapper
-		JavaRDD<Tuple2<Integer,DistinctValue>> tfMapOutput 
-			= inputRDD.mapPartitionsWithIndex(
-					new GenTfMtdMap(prop.hasHeader(), 
-									prop.getDelim(), 
-									prop.getNAStrings(), 
-									specFile, 
-									numCols, 
-									headerLine), 
-					true );
-		
-		// Shuffle to group by DistinctValue
-		JavaPairRDD<Integer,Iterable<DistinctValue>> rdd = JavaPairRDD.fromJavaRDD(tfMapOutput).groupByKey();
-		
-		// Construct transformation metadata (Reduce-side)
-		// Note: logic is similar to GTFMTDReducer
-		JavaRDD<Long> out 
-			= rdd.flatMap(new GenTfMtdReduce(prop.hasHeader(), 
-												prop.getDelim(), 
-												prop.getNAStrings(), 
-												headerLine, 
-												tfMtdPath, 
-												partOffsetsFile, 
-												specFile, 
-												numCols)  );
-		
-		// Compute the total number of transformed rows
-		long numRows = out.reduce(new Function2<Long,Long,Long>() {
-			private static final long serialVersionUID = 1263336168859959795L;
-
-			@Override
-			public Long call(Long v1, Long v2) throws Exception {
-				return v1+v2;
-			}
-			
-		});
-		
-		return numRows;
-	}
-	
-	// ----------------------------------------------------------------------------------------------------------------------
-	
-	public static class GenTfMtdMap implements Function2<Integer, Iterator<Tuple2<LongWritable, Text>>, Iterator<Tuple2<Integer,DistinctValue>>> {
-
-		private static final long serialVersionUID = -5622745445470598215L;
-		
-		TfUtils _agents = null;
-		
-		GenTfMtdMap(boolean hasHeader, String delim, String naStrings, String specFile, long numCols, String headerLine) throws IllegalArgumentException, IOException, JSONException {
-			
-			// Setup Transformation Agents
-			JobConf job = new JobConf();
-			FileSystem fs = FileSystem.get(job);
-			String[] nas = TfUtils.parseNAStrings(naStrings);
-			
-			JSONObject spec = TfUtils.readSpec(fs, specFile);
-			_agents = new TfUtils(headerLine, hasHeader, delim, nas, spec, numCols, null, null, null);
-
-		}
-		
-		@Override
-		public Iterator<Tuple2<Integer,DistinctValue>> call(Integer partitionID,
-				Iterator<Tuple2<LongWritable, Text>> csvLines) throws Exception {
-			
-			// Construct transformation metadata by looping through csvLines
-			// Note: logic is similar to GTFMTDMapper
-			
-			boolean first = true;
-			Tuple2<LongWritable, Text> rec = null;
-			long _offsetInPartFile = -1;
-			
-			while(csvLines.hasNext()) {
-				rec = csvLines.next();
-				
-				if (first) {
-					first = false;
-					_offsetInPartFile = rec._1().get();
-					
-					if (partitionID == 0 && _agents.hasHeader() && _offsetInPartFile == 0 )
-						continue; // skip the header line
-				}
-				
-				_agents.prepareTfMtd(rec._2().toString());
-			}
-			
-			// Prepare the output in the form of DistinctValues, which subsequently need to be grouped and aggregated. 
-			
-			ArrayList<Tuple2<Integer,DistinctValue>> outList = new ArrayList<Tuple2<Integer,DistinctValue>>();
-			
-			_agents.getMVImputeAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
-			_agents.getRecodeAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
-			_agents.getBinAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
-			
-			DistinctValue dv = new DistinctValue(new OffsetCount("Partition"+partitionID, _offsetInPartFile, _agents.getTotal()));
-			Tuple2<Integer, DistinctValue> tuple = new Tuple2<Integer, DistinctValue>((int) (_agents.getNumCols()+1), dv); 
-			outList.add(tuple);
-
-			return outList.iterator();
-		}
-		
-	}
-	
-	// ------------------------------------------------------------------------------------------------
-	
-	public static class GenTfMtdReduce implements FlatMapFunction<Tuple2<Integer, Iterable<DistinctValue>>, Long> {
-		
-		private static final long serialVersionUID = -2733233671193035242L;
-		TfUtils _agents = null;
-		
-		GenTfMtdReduce(boolean hasHeader, String delim, String naStrings, String headerLine, String tfMtdDir, String offsetFile, String specFile, long numCols) throws IOException, JSONException {
-			String[] nas = TfUtils.parseNAStrings(naStrings); 
-			FileSystem fs = FileSystem.get(new JobConf());
-
-			JSONObject spec = TfUtils.readSpec(fs, specFile);
-			_agents = new TfUtils(headerLine, hasHeader, delim, nas, spec, numCols, tfMtdDir, offsetFile, null);
-		}
-
-		@SuppressWarnings("unchecked")
-		@Override
-		public Iterable<Long> call(Tuple2<Integer, Iterable<DistinctValue>> t)
-				throws Exception {
-			
-			int colID = t._1();
-			Iterator<DistinctValue> iterDV = t._2().iterator();
-
-			JobConf job = new JobConf();
-			FileSystem fs = FileSystem.get(job);
-			
-			ArrayList<Long> numRows = new ArrayList<Long>();
-			
-			if(colID < 0) 
-			{
-				// process mapper output for MV and Bin agents
-				colID = colID*-1;
-				_agents.getMVImputeAgent().mergeAndOutputTransformationMetadata(iterDV, _agents.getTfMtdDir(), colID, fs, _agents);
-				numRows.add(0L);
-			}
-			else if ( colID == _agents.getNumCols() + 1)
-			{
-				// process mapper output for OFFSET_FILE
-				ArrayList<OffsetCount> list = new ArrayList<OffsetCount>();
-				while(iterDV.hasNext())
-					list.add(new OffsetCount(iterDV.next().getOffsetCount()));
-				Collections.sort(list);
-				
-				@SuppressWarnings("deprecation")
-				SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, new Path(_agents.getOffsetFile()+"/part-00000"), ByteWritable.class, OffsetCount.class);
-				
-				long lineOffset=0;
-				for(OffsetCount oc: list)
-				{
-					long count=oc.count;
-					oc.count=lineOffset;
-					writer.append(new ByteWritable((byte)0), oc);
-					lineOffset+=count;
-				}
-				writer.close();
-				list.clear();
-				
-				numRows.add(lineOffset);
-			}
-			else 
-			{
-				// process mapper output for Recode agent
-				_agents.getRecodeAgent().mergeAndOutputTransformationMetadata(iterDV, _agents.getTfMtdDir(), colID, fs, _agents);
-				numRows.add(0L);
-			}
-			
-			return numRows;
-		}
-
-	}
-
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import scala.Tuple2;
+
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+
+public class GenTfMtdSPARK {
+
+	/**
+	 * Spark code to Generate Transform Metadata based on the given transformation
+	 * specification file (JSON format).
+	 * 
+	 */
+
+	public static long runSparkJob(SparkExecutionContext sec, JavaRDD<Tuple2<LongWritable, Text>> inputRDD, 
+									String tfMtdPath, String specFile, 
+									String partOffsetsFile, CSVFileFormatProperties prop, 
+									long numCols, String headerLine
+								) throws IOException, ClassNotFoundException, InterruptedException, IllegalArgumentException, JSONException {
+		
+		// Construct transformation metadata (map-side)
+		// Note: logic is similar to GTFMTDMapper
+		JavaRDD<Tuple2<Integer,DistinctValue>> tfMapOutput 
+			= inputRDD.mapPartitionsWithIndex(
+					new GenTfMtdMap(prop.hasHeader(), 
+									prop.getDelim(), 
+									prop.getNAStrings(), 
+									specFile, 
+									numCols, 
+									headerLine), 
+					true );
+		
+		// Shuffle to group by DistinctValue
+		JavaPairRDD<Integer,Iterable<DistinctValue>> rdd = JavaPairRDD.fromJavaRDD(tfMapOutput).groupByKey();
+		
+		// Construct transformation metadata (Reduce-side)
+		// Note: logic is similar to GTFMTDReducer
+		JavaRDD<Long> out 
+			= rdd.flatMap(new GenTfMtdReduce(prop.hasHeader(), 
+												prop.getDelim(), 
+												prop.getNAStrings(), 
+												headerLine, 
+												tfMtdPath, 
+												partOffsetsFile, 
+												specFile, 
+												numCols)  );
+		
+		// Compute the total number of transformed rows
+		long numRows = out.reduce(new Function2<Long,Long,Long>() {
+			private static final long serialVersionUID = 1263336168859959795L;
+
+			@Override
+			public Long call(Long v1, Long v2) throws Exception {
+				return v1+v2;
+			}
+			
+		});
+		
+		return numRows;
+	}
+	
+	// ----------------------------------------------------------------------------------------------------------------------
+	
+	public static class GenTfMtdMap implements Function2<Integer, Iterator<Tuple2<LongWritable, Text>>, Iterator<Tuple2<Integer,DistinctValue>>> {
+
+		private static final long serialVersionUID = -5622745445470598215L;
+		
+		TfUtils _agents = null;
+		
+		GenTfMtdMap(boolean hasHeader, String delim, String naStrings, String specFile, long numCols, String headerLine) throws IllegalArgumentException, IOException, JSONException {
+			
+			// Setup Transformation Agents
+			JobConf job = new JobConf();
+			FileSystem fs = FileSystem.get(job);
+			String[] nas = TfUtils.parseNAStrings(naStrings);
+			
+			JSONObject spec = TfUtils.readSpec(fs, specFile);
+			_agents = new TfUtils(headerLine, hasHeader, delim, nas, spec, numCols, null, null, null);
+
+		}
+		
+		@Override
+		public Iterator<Tuple2<Integer,DistinctValue>> call(Integer partitionID,
+				Iterator<Tuple2<LongWritable, Text>> csvLines) throws Exception {
+			
+			// Construct transformation metadata by looping through csvLines
+			// Note: logic is similar to GTFMTDMapper
+			
+			boolean first = true;
+			Tuple2<LongWritable, Text> rec = null;
+			long _offsetInPartFile = -1;
+			
+			while(csvLines.hasNext()) {
+				rec = csvLines.next();
+				
+				if (first) {
+					first = false;
+					_offsetInPartFile = rec._1().get();
+					
+					if (partitionID == 0 && _agents.hasHeader() && _offsetInPartFile == 0 )
+						continue; // skip the header line
+				}
+				
+				_agents.prepareTfMtd(rec._2().toString());
+			}
+			
+			// Prepare the output in the form of DistinctValues, which subsequently need to be grouped and aggregated. 
+			
+			ArrayList<Tuple2<Integer,DistinctValue>> outList = new ArrayList<Tuple2<Integer,DistinctValue>>();
+			
+			_agents.getMVImputeAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
+			_agents.getRecodeAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
+			_agents.getBinAgent().mapOutputTransformationMetadata(partitionID, outList, _agents);
+			
+			DistinctValue dv = new DistinctValue(new OffsetCount("Partition"+partitionID, _offsetInPartFile, _agents.getTotal()));
+			Tuple2<Integer, DistinctValue> tuple = new Tuple2<Integer, DistinctValue>((int) (_agents.getNumCols()+1), dv); 
+			outList.add(tuple);
+
+			return outList.iterator();
+		}
+		
+	}
+	
+	// ------------------------------------------------------------------------------------------------
+	
+	public static class GenTfMtdReduce implements FlatMapFunction<Tuple2<Integer, Iterable<DistinctValue>>, Long> {
+		
+		private static final long serialVersionUID = -2733233671193035242L;
+		TfUtils _agents = null;
+		
+		GenTfMtdReduce(boolean hasHeader, String delim, String naStrings, String headerLine, String tfMtdDir, String offsetFile, String specFile, long numCols) throws IOException, JSONException {
+			String[] nas = TfUtils.parseNAStrings(naStrings); 
+			FileSystem fs = FileSystem.get(new JobConf());
+
+			JSONObject spec = TfUtils.readSpec(fs, specFile);
+			_agents = new TfUtils(headerLine, hasHeader, delim, nas, spec, numCols, tfMtdDir, offsetFile, null);
+		}
+
+		@SuppressWarnings("unchecked")
+		@Override
+		public Iterable<Long> call(Tuple2<Integer, Iterable<DistinctValue>> t)
+				throws Exception {
+			
+			int colID = t._1();
+			Iterator<DistinctValue> iterDV = t._2().iterator();
+
+			JobConf job = new JobConf();
+			FileSystem fs = FileSystem.get(job);
+			
+			ArrayList<Long> numRows = new ArrayList<Long>();
+			
+			if(colID < 0) 
+			{
+				// process mapper output for MV and Bin agents
+				colID = colID*-1;
+				_agents.getMVImputeAgent().mergeAndOutputTransformationMetadata(iterDV, _agents.getTfMtdDir(), colID, fs, _agents);
+				numRows.add(0L);
+			}
+			else if ( colID == _agents.getNumCols() + 1)
+			{
+				// process mapper output for OFFSET_FILE
+				ArrayList<OffsetCount> list = new ArrayList<OffsetCount>();
+				while(iterDV.hasNext())
+					list.add(new OffsetCount(iterDV.next().getOffsetCount()));
+				Collections.sort(list);
+				
+				@SuppressWarnings("deprecation")
+				SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, new Path(_agents.getOffsetFile()+"/part-00000"), ByteWritable.class, OffsetCount.class);
+				
+				long lineOffset=0;
+				for(OffsetCount oc: list)
+				{
+					long count=oc.count;
+					oc.count=lineOffset;
+					writer.append(new ByteWritable((byte)0), oc);
+					lineOffset+=count;
+				}
+				writer.close();
+				list.clear();
+				
+				numRows.add(lineOffset);
+			}
+			else 
+			{
+				// process mapper output for Recode agent
+				_agents.getRecodeAgent().mergeAndOutputTransformationMetadata(iterDV, _agents.getTfMtdDir(), colID, fs, _agents);
+				numRows.add(0L);
+			}
+			
+			return numRows;
+		}
+
+	}
+
+	
+}


[24/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/old/wfundInputGenerator.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/old/wfundInputGenerator.dml b/src/test/scripts/applications/impute/old/wfundInputGenerator.dml
index 978dbe7..8f6836c 100644
--- a/src/test/scripts/applications/impute/old/wfundInputGenerator.dml
+++ b/src/test/scripts/applications/impute/old/wfundInputGenerator.dml
@@ -1,403 +1,403 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-is_GROUP_4_ENABLED = 0; #   = 1 or 0
-
-num_terms = 6;  # The number of term reports, feel free to change
-num_attrs = 19;  
-
-num_frees = 13;
-if (is_GROUP_4_ENABLED == 1) {
-    num_frees = 15; # The estimated last report had 15 degrees of freedom
-}
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# ---------------------------------------------------------
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-
-# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
-# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
-CReps [(num_terms-1) * num_attrs +  1,  1] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  1,  2] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  1,  3] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  1,  4] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  1,  5] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  1,  6] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  2,  1] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  3,  2] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  4,  3] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  5,  4] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  6,  5] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  7,  6] = 1.0 + zero;
-
-# row 8 is free variable not appearing in any non-free variable
-CReps [(num_terms-1) * num_attrs + 8, 7] = 1.0 + zero;
-
-# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
-# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
-CReps [(num_terms-1) * num_attrs +  9,  8] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  9,  9] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  9, 10] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  9, 11] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  9, 12] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs +  9, 13] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 10,  8] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 11,  9] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 12, 10] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 13, 11] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 14, 12] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 15, 13] = 1.0 + zero;
-
-# constraint that          row16 =  row14 +  row15
-# translated to free vars: row16 = free14 + free15
-if (is_GROUP_4_ENABLED == 1) {
-    CReps [(num_terms-1) * num_attrs + 16, 14] = 1.0 + zero;
-    CReps [(num_terms-1) * num_attrs + 16, 15] = 1.0 + zero;
-    CReps [(num_terms-1) * num_attrs + 17, 14] = 1.0 + zero;
-    CReps [(num_terms-1) * num_attrs + 18, 15] = 1.0 + zero;
-}
-
-# constraint that           row19 = total cost (all free variables)
-# translated to free vars:  row19 = all free variables
-CReps [(num_terms-1) * num_attrs + 19,  1] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  2] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  3] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  4] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  5] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  6] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  7] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  8] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19,  9] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19, 10] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19, 11] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19, 12] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 19, 13] = 1.0 + zero;
-if (is_GROUP_4_ENABLED == 1) {
-    CReps [(num_terms-1) * num_attrs + 19, 14] = 1.0 + zero;
-    CReps [(num_terms-1) * num_attrs + 19, 15] = 1.0 + zero;
-}
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINER MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-# In all regressions, except the last few "special" ones, there are 4 factors:
-# x[t]  ~  x[t-1],  (x[t-1] - x[t-2]),  aggregate[t]
-# The last regressions are for regularization, but they also follow the 4-factor pattern.
-num_factors = 4; 
-
-# We have one regression equation per time-term, except the first two terms, for each
-# attribute, plus a few "special" regularization regression equations:
-num_special_regs = 12;
-if (is_GROUP_4_ENABLED == 1) {
-    num_special_regs = 16;
-}
-
-num_reg_eqs = (num_terms - 2) * num_attrs + num_special_regs;
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# All regression equations for the same attribute share the same parameters, regardless
-# of the term; some parameters are shared across multiple attributes, (those attributes
-# whose behavior is believed to be similar) as specified in the table below:
-
-num_params = 28;
-if (is_GROUP_4_ENABLED == 1) {
-    num_params = 35;
-}
-
-# Factors: -self[t]  self[t-1]  self[t-1]-  total[t]
-#                                self[t-2]
-# PARAMS:
-# Group 1:   1.0     prm#01     prm#02      prm#03    Row #01 = free#01 + ... + free#06
-# Group 1:    "      prm#04     prm#05      prm#06    Row #02 = free#01
-# Group 1:    "        "          "         prm#07    Row #03 = free#02
-# Group 1:    "        "          "         prm#08    Row #04 = free#03
-# Group 1:    "        "          "         prm#09    Row #05 = free#04
-# Group 1:    "        "          "         prm#10    Row #06 = free#05
-# Group 1:    "        "          "         prm#11    Row #07 = free#06
-# Group 2:   1.0     prm#12     prm#13      prm#14    Row #08 = free#07
-# Group 3:   1.0     prm#15     prm#16      prm#17    Row #09 = free#08 + ... + free#13
-# Group 3:    "      prm#18     prm#19      prm#20    Row #10 = free#08
-# Group 3:    "        "          "         prm#21    Row #11 = free#09
-# Group 3:    "        "          "         prm#22    Row #12 = free#10
-# Group 3:    "        "          "         prm#23    Row #13 = free#11
-# Group 3:    "        "          "         prm#24    Row #14 = free#12
-# Group 3:    "        "          "         prm#25    Row #15 = free#13
-
-# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
-# Group 4:   1.0     prm#29     prm#30      prm#31    Row #16 = free#14 + free#15
-# Group 4:    "      prm#32     prm#33      prm#34    Row #17 = free#14
-# Group 4:    "        "          "         prm#35    Row #18 = free#15
-
-# Group 5:   1.0     prm#26     prm#27      prm#28    Row #19 = free#01 + ... + free#15
-# 
-# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
-#  the total cost in Group 5 regresses on the intercept.)
-
-# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
-# Factors:   1.0      -1.0       0.0        0.0
-# PARAMS:
-#          prm#26      1.0       0.0        0.0
-#          prm#27      0.0       0.0        0.0
-#          prm#01      0.0       0.0        0.0
-#          prm#02      0.0       0.0        0.0
-#          prm#04      0.0       0.0        0.0
-#          prm#05      0.0       0.0        0.0
-#          prm#12      0.0       0.0        0.0
-#          prm#13      0.0       0.0        0.0
-#          prm#15      0.0       0.0        0.0
-#          prm#16      0.0       0.0        0.0
-#          prm#18      0.0       0.0        0.0
-#          prm#19      0.0       0.0        0.0
-#          prm#29      0.0       0.0        0.0  # GROUP-4 ZEROS:
-#          prm#30      0.0       0.0        0.0  #   THESE EQUATIONS
-#          prm#32      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
-#          prm#33      0.0       0.0        0.0  #   AND DO NOT APPEAR
-
-
-for (t in 3 : num_terms)
-{
-# Group 1 attributes:
-    for (i in 1 : 7) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
-        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
-        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
-        if (i == 1) {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs +  1] = 1.0 + zero; # 4th factor: Row#01[t]
-        }
-    }
-
-# Group 2 attribute:
-    reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors;
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs +  8] = -1.0 + zero;  # First factor is -x[t]
-    RegresValueMap [reg_index + 2, (t-2) * num_attrs +  8] =  1.0 + zero;  # Second factor is x[t-1]
-    RegresValueMap [reg_index + 3, (t-2) * num_attrs +  8] =  1.0 + zero;  # Third factor is
-    RegresValueMap [reg_index + 3, (t-3) * num_attrs +  8] = -1.0 + zero;  #   x[t-1] - x[t-2]
-    RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] =  1.0 + zero;  # 4th factor: Row#19[t]
-
-# Group 3 attributes:
-    for (i in 9 : 15) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
-        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
-        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
-        if (i == 9) {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs +  9] = 1.0 + zero; # 4th factor: Row#09[t]
-        }
-    }
-
-# Group 4 attributes:
-    for (i in 16 : 18) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
-        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
-        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
-        if (i == 16) {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 16] = 1.0 + zero; # 4th factor: Row#16[t]
-        }
-    }
-
-# Group 5 attribute:
-    reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors;
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero;  # First factor is -x[t]
-    RegresValueMap [reg_index + 2, (t-2) * num_attrs + 19] =  1.0 + zero;  # Second factor is x[t-1]
-    RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero;  # Third factor is
-    RegresValueMap [reg_index + 3, (t-3) * num_attrs + 19] = -1.0 + zero;  #   x[t-1] - x[t-2]
-    RegresFactorDefault [reg_index + 4, 1]                 =  1.0 + zero;  # The Intercept
-}
-
-for (i in 1:num_special_regs)
-{
-    reg_index = ((num_terms - 2) * num_attrs - 1 + i) * num_factors;
-    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
-    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
-}
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS:  A LINER MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-for (t in 3 : num_terms) {
-# Group 1 attributes:
-    reg_index = ((t-3) * num_attrs - 1 + 1) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
-    RegresParamMap [reg_index + 3,  2]    = 1.0 + zero;  # Param #02
-    RegresParamMap [reg_index + 4,  3]    = 1.0 + zero;  # Param #03
-    for (i in 2 : 7) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  4]     = 1.0 + zero;  # Param #04
-        RegresParamMap [reg_index + 3,  5]     = 1.0 + zero;  # Param #05
-        RegresParamMap [reg_index + 4,  4 + i] = 1.0 + zero;  # Param #06-#11
-    }
-# Group 2 attribute:
-    reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
-    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
-    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
-# Group 3 attributes:
-    reg_index = ((t-3) * num_attrs - 1 + 9) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #15
-    RegresParamMap [reg_index + 3, 16]     = 1.0 + zero;  # Param #16
-    RegresParamMap [reg_index + 4, 17]     = 1.0 + zero;  # Param #17
-    for (i in 10 : 15) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2, 18]     = 1.0 + zero;  # Param #18
-        RegresParamMap [reg_index + 3, 19]     = 1.0 + zero;  # Param #19
-        RegresParamMap [reg_index + 4, 10 + i] = 1.0 + zero;  # Param #20-#25
-    }
-    
-# Group 4 attributes:
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = ((t-3) * num_attrs - 1 + 16) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
-    RegresParamMap [reg_index + 3, 30]     = 1.0 + zero;  # Param #30
-    RegresParamMap [reg_index + 4, 31]     = 1.0 + zero;  # Param #31
-    for (i in 17 : 18) {
-        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2, 32]     = 1.0 + zero;  # Param #32
-        RegresParamMap [reg_index + 3, 33]     = 1.0 + zero;  # Param #33
-        RegresParamMap [reg_index + 4, 17 + i] = 1.0 + zero;  # Param #34-#35
-    }
-}
-
-# Group 5 attribute:
-    reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
-    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
-    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
-}
-
-reg_index = ((num_terms - 2) * num_attrs) * num_factors;
-    RegresParamMap [reg_index + 1, 26] = 1.0 + zero;  # Param #26
-    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 01] = 1.0 + zero;  # Param #01
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 02] = 1.0 + zero;  # Param #02
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 04] = 1.0 + zero;  # Param #04
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 05] = 1.0 + zero;  # Param #05
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 12] = 1.0 + zero;  # Param #12
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 15] = 1.0 + zero;  # Param #15
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 16] = 1.0 + zero;  # Param #16
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 18] = 1.0 + zero;  # Param #18
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 19] = 1.0 + zero;  # Param #19
-
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 29] = 1.0 + zero;  # Param #29
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 30] = 1.0 + zero;  # Param #30
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
-}
-
-# ----------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
-# ----------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-initial_reports = read ($1);
-
-global_weight = 0.5 + zero;
-
-attribute_size = rowMeans (abs (initial_reports [, 1:(num_terms-1)]));
-max_attr_size = max (attribute_size);
-
-for (t in 3 : num_terms) {
-    for (i in 1 : num_attrs) {
-        regeqn = (t-3) * num_attrs + i;
-        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
-        acceptable_drift = scale_down * max_attr_size * 0.001;
-        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-    }
-}
-
-regeqn = (num_terms - 2) * num_attrs + 1;
-for (i in 1 : num_special_regs) {
-    acceptable_drift = 0.01;
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-    regeqn = regeqn + 1;
-}
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-# write (initial_reports,    $1, format="text");
-write (CReps,              $2, format="text");
-write (RegresValueMap,     $3, format="text");
-write (RegresFactorDefault,$4, format="text");
-write (RegresParamMap,     $5, format="text");
-write (RegresCoeffDefault, $6, format="text");
-write (RegresScaleMult,    $7, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+is_GROUP_4_ENABLED = 0; #   = 1 or 0
+
+num_terms = 6;  # The number of term reports, feel free to change
+num_attrs = 19;  
+
+num_frees = 13;
+if (is_GROUP_4_ENABLED == 1) {
+    num_frees = 15; # The estimated last report had 15 degrees of freedom
+}
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# ---------------------------------------------------------
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+
+# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
+# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
+CReps [(num_terms-1) * num_attrs +  1,  1] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  1,  2] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  1,  3] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  1,  4] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  1,  5] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  1,  6] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  2,  1] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  3,  2] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  4,  3] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  5,  4] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  6,  5] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  7,  6] = 1.0 + zero;
+
+# row 8 is free variable not appearing in any non-free variable
+CReps [(num_terms-1) * num_attrs + 8, 7] = 1.0 + zero;
+
+# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
+# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
+CReps [(num_terms-1) * num_attrs +  9,  8] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  9,  9] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  9, 10] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  9, 11] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  9, 12] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs +  9, 13] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 10,  8] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 11,  9] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 12, 10] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 13, 11] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 14, 12] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 15, 13] = 1.0 + zero;
+
+# constraint that          row16 =  row14 +  row15
+# translated to free vars: row16 = free14 + free15
+if (is_GROUP_4_ENABLED == 1) {
+    CReps [(num_terms-1) * num_attrs + 16, 14] = 1.0 + zero;
+    CReps [(num_terms-1) * num_attrs + 16, 15] = 1.0 + zero;
+    CReps [(num_terms-1) * num_attrs + 17, 14] = 1.0 + zero;
+    CReps [(num_terms-1) * num_attrs + 18, 15] = 1.0 + zero;
+}
+
+# constraint that           row19 = total cost (all free variables)
+# translated to free vars:  row19 = all free variables
+CReps [(num_terms-1) * num_attrs + 19,  1] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  2] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  3] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  4] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  5] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  6] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  7] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  8] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19,  9] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19, 10] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19, 11] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19, 12] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 19, 13] = 1.0 + zero;
+if (is_GROUP_4_ENABLED == 1) {
+    CReps [(num_terms-1) * num_attrs + 19, 14] = 1.0 + zero;
+    CReps [(num_terms-1) * num_attrs + 19, 15] = 1.0 + zero;
+}
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINER MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+# In all regressions, except the last few "special" ones, there are 4 factors:
+# x[t]  ~  x[t-1],  (x[t-1] - x[t-2]),  aggregate[t]
+# The last regressions are for regularization, but they also follow the 4-factor pattern.
+num_factors = 4; 
+
+# We have one regression equation per time-term, except the first two terms, for each
+# attribute, plus a few "special" regularization regression equations:
+num_special_regs = 12;
+if (is_GROUP_4_ENABLED == 1) {
+    num_special_regs = 16;
+}
+
+num_reg_eqs = (num_terms - 2) * num_attrs + num_special_regs;
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# All regression equations for the same attribute share the same parameters, regardless
+# of the term; some parameters are shared across multiple attributes, (those attributes
+# whose behavior is believed to be similar) as specified in the table below:
+
+num_params = 28;
+if (is_GROUP_4_ENABLED == 1) {
+    num_params = 35;
+}
+
+# Factors: -self[t]  self[t-1]  self[t-1]-  total[t]
+#                                self[t-2]
+# PARAMS:
+# Group 1:   1.0     prm#01     prm#02      prm#03    Row #01 = free#01 + ... + free#06
+# Group 1:    "      prm#04     prm#05      prm#06    Row #02 = free#01
+# Group 1:    "        "          "         prm#07    Row #03 = free#02
+# Group 1:    "        "          "         prm#08    Row #04 = free#03
+# Group 1:    "        "          "         prm#09    Row #05 = free#04
+# Group 1:    "        "          "         prm#10    Row #06 = free#05
+# Group 1:    "        "          "         prm#11    Row #07 = free#06
+# Group 2:   1.0     prm#12     prm#13      prm#14    Row #08 = free#07
+# Group 3:   1.0     prm#15     prm#16      prm#17    Row #09 = free#08 + ... + free#13
+# Group 3:    "      prm#18     prm#19      prm#20    Row #10 = free#08
+# Group 3:    "        "          "         prm#21    Row #11 = free#09
+# Group 3:    "        "          "         prm#22    Row #12 = free#10
+# Group 3:    "        "          "         prm#23    Row #13 = free#11
+# Group 3:    "        "          "         prm#24    Row #14 = free#12
+# Group 3:    "        "          "         prm#25    Row #15 = free#13
+
+# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
+# Group 4:   1.0     prm#29     prm#30      prm#31    Row #16 = free#14 + free#15
+# Group 4:    "      prm#32     prm#33      prm#34    Row #17 = free#14
+# Group 4:    "        "          "         prm#35    Row #18 = free#15
+
+# Group 5:   1.0     prm#26     prm#27      prm#28    Row #19 = free#01 + ... + free#15
+# 
+# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
+#  the total cost in Group 5 regresses on the intercept.)
+
+# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
+# Factors:   1.0      -1.0       0.0        0.0
+# PARAMS:
+#          prm#26      1.0       0.0        0.0
+#          prm#27      0.0       0.0        0.0
+#          prm#01      0.0       0.0        0.0
+#          prm#02      0.0       0.0        0.0
+#          prm#04      0.0       0.0        0.0
+#          prm#05      0.0       0.0        0.0
+#          prm#12      0.0       0.0        0.0
+#          prm#13      0.0       0.0        0.0
+#          prm#15      0.0       0.0        0.0
+#          prm#16      0.0       0.0        0.0
+#          prm#18      0.0       0.0        0.0
+#          prm#19      0.0       0.0        0.0
+#          prm#29      0.0       0.0        0.0  # GROUP-4 ZEROS:
+#          prm#30      0.0       0.0        0.0  #   THESE EQUATIONS
+#          prm#32      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
+#          prm#33      0.0       0.0        0.0  #   AND DO NOT APPEAR
+
+
+for (t in 3 : num_terms)
+{
+# Group 1 attributes:
+    for (i in 1 : 7) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
+        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
+        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
+        if (i == 1) {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs +  1] = 1.0 + zero; # 4th factor: Row#01[t]
+        }
+    }
+
+# Group 2 attribute:
+    reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors;
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs +  8] = -1.0 + zero;  # First factor is -x[t]
+    RegresValueMap [reg_index + 2, (t-2) * num_attrs +  8] =  1.0 + zero;  # Second factor is x[t-1]
+    RegresValueMap [reg_index + 3, (t-2) * num_attrs +  8] =  1.0 + zero;  # Third factor is
+    RegresValueMap [reg_index + 3, (t-3) * num_attrs +  8] = -1.0 + zero;  #   x[t-1] - x[t-2]
+    RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] =  1.0 + zero;  # 4th factor: Row#19[t]
+
+# Group 3 attributes:
+    for (i in 9 : 15) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
+        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
+        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
+        if (i == 9) {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs +  9] = 1.0 + zero; # 4th factor: Row#09[t]
+        }
+    }
+
+# Group 4 attributes:
+    for (i in 16 : 18) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # First factor is -x[t]
+        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
+        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
+        if (i == 16) {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 4, (t-1) * num_attrs + 16] = 1.0 + zero; # 4th factor: Row#16[t]
+        }
+    }
+
+# Group 5 attribute:
+    reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors;
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero;  # First factor is -x[t]
+    RegresValueMap [reg_index + 2, (t-2) * num_attrs + 19] =  1.0 + zero;  # Second factor is x[t-1]
+    RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero;  # Third factor is
+    RegresValueMap [reg_index + 3, (t-3) * num_attrs + 19] = -1.0 + zero;  #   x[t-1] - x[t-2]
+    RegresFactorDefault [reg_index + 4, 1]                 =  1.0 + zero;  # The Intercept
+}
+
+for (i in 1:num_special_regs)
+{
+    reg_index = ((num_terms - 2) * num_attrs - 1 + i) * num_factors;
+    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
+    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
+}
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS:  A LINER MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+for (t in 3 : num_terms) {
+# Group 1 attributes:
+    reg_index = ((t-3) * num_attrs - 1 + 1) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
+    RegresParamMap [reg_index + 3,  2]    = 1.0 + zero;  # Param #02
+    RegresParamMap [reg_index + 4,  3]    = 1.0 + zero;  # Param #03
+    for (i in 2 : 7) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  4]     = 1.0 + zero;  # Param #04
+        RegresParamMap [reg_index + 3,  5]     = 1.0 + zero;  # Param #05
+        RegresParamMap [reg_index + 4,  4 + i] = 1.0 + zero;  # Param #06-#11
+    }
+# Group 2 attribute:
+    reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
+    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
+    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
+# Group 3 attributes:
+    reg_index = ((t-3) * num_attrs - 1 + 9) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #15
+    RegresParamMap [reg_index + 3, 16]     = 1.0 + zero;  # Param #16
+    RegresParamMap [reg_index + 4, 17]     = 1.0 + zero;  # Param #17
+    for (i in 10 : 15) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2, 18]     = 1.0 + zero;  # Param #18
+        RegresParamMap [reg_index + 3, 19]     = 1.0 + zero;  # Param #19
+        RegresParamMap [reg_index + 4, 10 + i] = 1.0 + zero;  # Param #20-#25
+    }
+    
+# Group 4 attributes:
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = ((t-3) * num_attrs - 1 + 16) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
+    RegresParamMap [reg_index + 3, 30]     = 1.0 + zero;  # Param #30
+    RegresParamMap [reg_index + 4, 31]     = 1.0 + zero;  # Param #31
+    for (i in 17 : 18) {
+        reg_index = ((t-3) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2, 32]     = 1.0 + zero;  # Param #32
+        RegresParamMap [reg_index + 3, 33]     = 1.0 + zero;  # Param #33
+        RegresParamMap [reg_index + 4, 17 + i] = 1.0 + zero;  # Param #34-#35
+    }
+}
+
+# Group 5 attribute:
+    reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
+    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
+    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
+}
+
+reg_index = ((num_terms - 2) * num_attrs) * num_factors;
+    RegresParamMap [reg_index + 1, 26] = 1.0 + zero;  # Param #26
+    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 01] = 1.0 + zero;  # Param #01
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 02] = 1.0 + zero;  # Param #02
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 04] = 1.0 + zero;  # Param #04
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 05] = 1.0 + zero;  # Param #05
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 12] = 1.0 + zero;  # Param #12
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 15] = 1.0 + zero;  # Param #15
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 16] = 1.0 + zero;  # Param #16
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 18] = 1.0 + zero;  # Param #18
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 19] = 1.0 + zero;  # Param #19
+
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 29] = 1.0 + zero;  # Param #29
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 30] = 1.0 + zero;  # Param #30
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
+}
+
+# ----------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
+# ----------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+initial_reports = read ($1);
+
+global_weight = 0.5 + zero;
+
+attribute_size = rowMeans (abs (initial_reports [, 1:(num_terms-1)]));
+max_attr_size = max (attribute_size);
+
+for (t in 3 : num_terms) {
+    for (i in 1 : num_attrs) {
+        regeqn = (t-3) * num_attrs + i;
+        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
+        acceptable_drift = scale_down * max_attr_size * 0.001;
+        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+    }
+}
+
+regeqn = (num_terms - 2) * num_attrs + 1;
+for (i in 1 : num_special_regs) {
+    acceptable_drift = 0.01;
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+    regeqn = regeqn + 1;
+}
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+# write (initial_reports,    $1, format="text");
+write (CReps,              $2, format="text");
+write (RegresValueMap,     $3, format="text");
+write (RegresFactorDefault,$4, format="text");
+write (RegresParamMap,     $5, format="text");
+write (RegresCoeffDefault, $6, format="text");
+write (RegresScaleMult,    $7, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/test/testInputGenerator.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/test/testInputGenerator.dml b/src/test/scripts/applications/impute/test/testInputGenerator.dml
index 9eaf034..2040756 100644
--- a/src/test/scripts/applications/impute/test/testInputGenerator.dml
+++ b/src/test/scripts/applications/impute/test/testInputGenerator.dml
@@ -1,152 +1,152 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Generates synthetic data to try inputeGaussMCMC.dml
-# How To Run:
-#
-# 
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/test/testInputGenerator.dml -exec singlenode
-#    -args test/scripts/applications/impute/test/initial_reports test/scripts/applications/impute/test/CReps 
-#        test/scripts/applications/impute/test/RegresValueMap test/scripts/applications/impute/test/RegresParamMap
-
-num_terms = 40;   # The number of term reports, feel free to change
-num_attrs = 6;    # 4 regular attributes, 1 total cost, 1 auxiliary ("macroeconomic")
-num_frees = 4;    # We estimate the last report, which has 4 degrees of freedom
-num_factors = 4;  # In regressions: x[t] ~ x[t-1], (x[t-1] - x[t-2]), total_cost[t]
-
-# We have one regression equation per term, except the first two terms,
-# for each attribute except the auxiliary attribute:
-num_reg_eqs = (num_terms - 2) * (num_attrs - 1);
-
-# All regression equations for the same attribute share the same parameters,
-# regardless of the term: 
-num_params  = num_factors * (num_attrs - 1);
-
-# GENERATE THE INITIAL REPORTS MATRIX (with the last term report set to 0.0)
-
-initial_reports_matrix = matrix (0.0, rows = num_attrs, cols = num_terms);
-
-# We assume that the terms are quarterly.
-# Auxiliary attribute is = sqrt(1.1)^t, a steady exponential growth of 21% a year.
-# The total cost regresses on the auxiliary attribute and shows a combination of
-# exponential and cyclic behavior year after year.
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-initial_reports_matrix [6, 1] = zero + 1; # auxiliary attribute
-for (t in 2 : num_terms) {
-    initial_reports_matrix [6, t] = initial_reports_matrix [6, t-1] * sqrt (1.1);
-}
-
-initial_reports_matrix [1, 1] = zero + 1 * 0.4615107865026;
-initial_reports_matrix [2, 1] = zero + 1 * 0.0270996863066;
-initial_reports_matrix [3, 1] = zero + 1 * 0.3772761445953;
-initial_reports_matrix [4, 1] = zero + 1 * 0.1341133825954;
-initial_reports_matrix [5, 1] = zero + 1; # total cost attribute
-
-initial_reports_matrix [1, 2] = zero + 2 * 0.3281440348352;
-initial_reports_matrix [2, 2] = zero + 2 * 0.0345738029588;
-initial_reports_matrix [3, 2] = zero + 2 * 0.4052452565031;
-initial_reports_matrix [4, 2] = zero + 2 * 0.2320369057028;
-initial_reports_matrix [5, 2] = zero + 2; # total cost attribute
-
-for (t in 3 : (num_terms - 1))
-{
-    initial_reports_matrix [5, t] = 
-        - 1.1 *  initial_reports_matrix [5, t-1] 
-        + 1.1 * (initial_reports_matrix [5, t-1] - initial_reports_matrix [5, t-2])
-        + 3.0 * (initial_reports_matrix [6, t]);
-
-    initial_reports_matrix [1, t] = 
-          0.45 *  initial_reports_matrix [1, t-1] 
-        + 0.00 * (initial_reports_matrix [1, t-1] - initial_reports_matrix [1, t-2])
-        + 0.2243041078721 * (initial_reports_matrix [5, t]);
-
-    initial_reports_matrix [2, t] = 
-          0.00 *  initial_reports_matrix [2, t-1] 
-        + 0.45 * (initial_reports_matrix [2, t-1] - initial_reports_matrix [2, t-2])
-        + 0.0417492985298 * (initial_reports_matrix [5, t]);
-
-    initial_reports_matrix [3, t] = 
-        - 0.40 *  initial_reports_matrix [3, t-1] 
-        + 0.00 * (initial_reports_matrix [3, t-1] - initial_reports_matrix [3, t-2])
-        + 0.4807004854222 * (initial_reports_matrix [5, t]);
-
-    initial_reports_matrix [4, t] =
-        - 0.20 * initial_reports_matrix [4, t-1] 
-        + 0.30 * (initial_reports_matrix [4, t-1] - initial_reports_matrix [4, t-2])
-        + 0.2549604916594 * (initial_reports_matrix [5, t]);
-}
-
-# GENERATE A LINEAR MAP FROM FREE VARIABLES TO THE REPORTS
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 2, 2] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 3, 3] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 4, 4] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 5, 1] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 5, 2] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 5, 3] = 1.0 + zero;
-CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero;
-
-# GENERATE A LINEAR MAP FROM REPORTS TO REGRESSION FACTORS
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-
-for (t in 3 : num_terms) {
-    for (i in 1 : (num_attrs - 2)) {
-        reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] =  1.0 + zero;  # First factor is x[t]
-        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
-        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
-        RegresValueMap [reg_index + 4, (t-1) * num_attrs + 5] =  1.0 + zero;  # 4th factor = total_cost[t]
-    }
-    # For the total cost itself, the regression is almost the same, except the last line:
-    reg_index = ((t-3)*(num_attrs-1)-1 + 5) * num_factors;
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs + 5] =  1.0 + zero;  # First factor is x[t]
-    RegresValueMap [reg_index + 2, (t-2) * num_attrs + 5] =  1.0 + zero;  # Second factor is x[t-1]
-    RegresValueMap [reg_index + 3, (t-2) * num_attrs + 5] =  1.0 + zero;  # Third factor is
-    RegresValueMap [reg_index + 3, (t-3) * num_attrs + 5] = -1.0 + zero;  #   x[t-1] - x[t-2]
-    RegresValueMap [reg_index + 4, (t-1) * num_attrs + 6] =  1.0 + zero;  # 4th factor = auxiliary[t]
-}
-
-# GENERATE A LINEAR MAP FROM PARAMETERS TO REGRESSION FACTORS
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-
-for (t in 3 : num_terms) {
-    for (i in 1 : (num_attrs - 1)) {
-        reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors;
-        RegresParamMap [reg_index + 1, 0 * (num_attrs-1) + i] = 1.0 + zero;
-        RegresParamMap [reg_index + 2, 1 * (num_attrs-1) + i] = 1.0 + zero;
-        RegresParamMap [reg_index + 3, 2 * (num_attrs-1) + i] = 1.0 + zero;
-        RegresParamMap [reg_index + 4, 3 * (num_attrs-1) + i] = 1.0 + zero;
-    }
-}
-
-# WRITE OUT ALL GENERATED MATRICES
-
-write (initial_reports_matrix, $1, format="text");
-write (CReps, $2, format="text");
-write (RegresValueMap, $3, format="text");
-write (RegresParamMap, $4, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Generates synthetic data to try inputeGaussMCMC.dml
+# How To Run:
+#
+# 
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/test/testInputGenerator.dml -exec singlenode
+#    -args test/scripts/applications/impute/test/initial_reports test/scripts/applications/impute/test/CReps 
+#        test/scripts/applications/impute/test/RegresValueMap test/scripts/applications/impute/test/RegresParamMap
+
+num_terms = 40;   # The number of term reports, feel free to change
+num_attrs = 6;    # 4 regular attributes, 1 total cost, 1 auxiliary ("macroeconomic")
+num_frees = 4;    # We estimate the last report, which has 4 degrees of freedom
+num_factors = 4;  # In regressions: x[t] ~ x[t-1], (x[t-1] - x[t-2]), total_cost[t]
+
+# We have one regression equation per term, except the first two terms,
+# for each attribute except the auxiliary attribute:
+num_reg_eqs = (num_terms - 2) * (num_attrs - 1);
+
+# All regression equations for the same attribute share the same parameters,
+# regardless of the term: 
+num_params  = num_factors * (num_attrs - 1);
+
+# GENERATE THE INITIAL REPORTS MATRIX (with the last term report set to 0.0)
+
+initial_reports_matrix = matrix (0.0, rows = num_attrs, cols = num_terms);
+
+# We assume that the terms are quarterly.
+# Auxiliary attribute is = sqrt(1.1)^t, a steady exponential growth of 21% a year.
+# The total cost regresses on the auxiliary attribute and shows a combination of
+# exponential and cyclic behavior year after year.
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+initial_reports_matrix [6, 1] = zero + 1; # auxiliary attribute
+for (t in 2 : num_terms) {
+    initial_reports_matrix [6, t] = initial_reports_matrix [6, t-1] * sqrt (1.1);
+}
+
+initial_reports_matrix [1, 1] = zero + 1 * 0.4615107865026;
+initial_reports_matrix [2, 1] = zero + 1 * 0.0270996863066;
+initial_reports_matrix [3, 1] = zero + 1 * 0.3772761445953;
+initial_reports_matrix [4, 1] = zero + 1 * 0.1341133825954;
+initial_reports_matrix [5, 1] = zero + 1; # total cost attribute
+
+initial_reports_matrix [1, 2] = zero + 2 * 0.3281440348352;
+initial_reports_matrix [2, 2] = zero + 2 * 0.0345738029588;
+initial_reports_matrix [3, 2] = zero + 2 * 0.4052452565031;
+initial_reports_matrix [4, 2] = zero + 2 * 0.2320369057028;
+initial_reports_matrix [5, 2] = zero + 2; # total cost attribute
+
+for (t in 3 : (num_terms - 1))
+{
+    initial_reports_matrix [5, t] = 
+        - 1.1 *  initial_reports_matrix [5, t-1] 
+        + 1.1 * (initial_reports_matrix [5, t-1] - initial_reports_matrix [5, t-2])
+        + 3.0 * (initial_reports_matrix [6, t]);
+
+    initial_reports_matrix [1, t] = 
+          0.45 *  initial_reports_matrix [1, t-1] 
+        + 0.00 * (initial_reports_matrix [1, t-1] - initial_reports_matrix [1, t-2])
+        + 0.2243041078721 * (initial_reports_matrix [5, t]);
+
+    initial_reports_matrix [2, t] = 
+          0.00 *  initial_reports_matrix [2, t-1] 
+        + 0.45 * (initial_reports_matrix [2, t-1] - initial_reports_matrix [2, t-2])
+        + 0.0417492985298 * (initial_reports_matrix [5, t]);
+
+    initial_reports_matrix [3, t] = 
+        - 0.40 *  initial_reports_matrix [3, t-1] 
+        + 0.00 * (initial_reports_matrix [3, t-1] - initial_reports_matrix [3, t-2])
+        + 0.4807004854222 * (initial_reports_matrix [5, t]);
+
+    initial_reports_matrix [4, t] =
+        - 0.20 * initial_reports_matrix [4, t-1] 
+        + 0.30 * (initial_reports_matrix [4, t-1] - initial_reports_matrix [4, t-2])
+        + 0.2549604916594 * (initial_reports_matrix [5, t]);
+}
+
+# GENERATE A LINEAR MAP FROM FREE VARIABLES TO THE REPORTS
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 2, 2] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 3, 3] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 4, 4] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 5, 1] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 5, 2] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 5, 3] = 1.0 + zero;
+CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero;
+
+# GENERATE A LINEAR MAP FROM REPORTS TO REGRESSION FACTORS
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+
+for (t in 3 : num_terms) {
+    for (i in 1 : (num_attrs - 2)) {
+        reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] =  1.0 + zero;  # First factor is x[t]
+        RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] =  1.0 + zero;  # Second factor is x[t-1]
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero;  # Third factor is
+        RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero;  #   x[t-1] - x[t-2]
+        RegresValueMap [reg_index + 4, (t-1) * num_attrs + 5] =  1.0 + zero;  # 4th factor = total_cost[t]
+    }
+    # For the total cost itself, the regression is almost the same, except the last line:
+    reg_index = ((t-3)*(num_attrs-1)-1 + 5) * num_factors;
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs + 5] =  1.0 + zero;  # First factor is x[t]
+    RegresValueMap [reg_index + 2, (t-2) * num_attrs + 5] =  1.0 + zero;  # Second factor is x[t-1]
+    RegresValueMap [reg_index + 3, (t-2) * num_attrs + 5] =  1.0 + zero;  # Third factor is
+    RegresValueMap [reg_index + 3, (t-3) * num_attrs + 5] = -1.0 + zero;  #   x[t-1] - x[t-2]
+    RegresValueMap [reg_index + 4, (t-1) * num_attrs + 6] =  1.0 + zero;  # 4th factor = auxiliary[t]
+}
+
+# GENERATE A LINEAR MAP FROM PARAMETERS TO REGRESSION FACTORS
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+
+for (t in 3 : num_terms) {
+    for (i in 1 : (num_attrs - 1)) {
+        reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors;
+        RegresParamMap [reg_index + 1, 0 * (num_attrs-1) + i] = 1.0 + zero;
+        RegresParamMap [reg_index + 2, 1 * (num_attrs-1) + i] = 1.0 + zero;
+        RegresParamMap [reg_index + 3, 2 * (num_attrs-1) + i] = 1.0 + zero;
+        RegresParamMap [reg_index + 4, 3 * (num_attrs-1) + i] = 1.0 + zero;
+    }
+}
+
+# WRITE OUT ALL GENERATED MATRICES
+
+write (initial_reports_matrix, $1, format="text");
+write (CReps, $2, format="text");
+write (RegresValueMap, $3, format="text");
+write (RegresParamMap, $4, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml b/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml
index 9a20214..fcbf47c 100644
--- a/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml
+++ b/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml
@@ -1,174 +1,174 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-
-# GENERATE SYNTHETIC "INITIAL REPORTS"
-
-num_terms = 10;
-num_series = 10;
-num_attrs = 2 * num_series;
-num_frees = num_series * (num_terms + 1);
-
-initial_reports = Rand (rows = num_attrs, cols = num_terms, min = -50.0, max = 50.0);
-
-for (s in 1:num_series) {
-    for (t in 1:(num_terms - 1)) {
-        val = 400 - (t - 14.16) * (t - 5.5) * (t + 3.16) / 2.463552;
-        initial_reports [2 * (s-1) + 1, t] = initial_reports [2 * (s-1) + 1, t] + val;
-    }
-}
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# ---------------------------------------------------------
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-
-for (s in 1:num_series) {
-    for (t in 0:num_terms) {
-        ta_shift = (t - 1) * num_attrs + 2 * s;
-        if (t == 0) {
-            ta_shift = (num_terms - 1) * num_attrs + (2 * s - 1);
-        }        
-        CReps [ta_shift, t * num_series + s] = 1.0 + zero;
-}   }
-
-# In all regressions, except the last few "special" ones, there are 3 factors
-# (here "x" are the "states" and "y" are the "observations"):
-# Observation  regression:  y[t]-x[t] ~ a * 1      ###   + b * (y[t-1]-x[t-1])
-# State-change regression:  x[t] ~ c * x[t-1] + d * (x[t-1]-x[t-2])
-
-num_factors = 3; 
-num_reg_eqs = num_terms * 2 * num_series;
-num_params  = 3 * num_series;
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-
-for (t in 1 : num_terms) {
-    for (s in 1 : num_series) {
-
-        reg_index =  ((t-1) * num_series + (s-1)) * 2 * num_factors;
-
-# Observation regression:
-
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s - 1] = -1.0 + zero; # 1st factor: 
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s    ] =  1.0 + zero; #   -(y[t]-x[t])
-        
-        RegresFactorDefault [reg_index + 2, 1] =  1.0 + zero; # 2nd factor: Intercept
-        
-#       RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s - 1] =  1.0 + zero; # 3rd factor: 
-#       RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s    ] = -1.0 + zero; #   y[t-1]-x[t-1]
-        
-        reg_index = reg_index + num_factors;
-        
-# State-change regression:
-
-        if (t >= 3) {
-            RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s] = -1.0 + zero; # 1st factor: -x[t]
-            RegresValueMap [reg_index + 2, (t-2) * num_attrs + 2 * s] =  1.0 + zero; # 2nd factor: x[t-1]
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s] =  1.0 + zero; # 3rd factor: 
-            RegresValueMap [reg_index + 3, (t-3) * num_attrs + 2 * s] = -1.0 + zero; #   x[t-1]-x[t-2]
-        }
-    }
-}
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-for (t in 1 : num_terms) {
-    for (s in 1 : num_series) {
-
-        reg_index =  ((t-1) * num_series + (s-1)) * 2 * num_factors;
-
-# Observation regression:
-
-        RegresCoeffDefault [reg_index + 1, 1] =  1.0 + zero;
-        RegresParamMap [reg_index + 2, 3 * (s-1) + 1] = 1.0 + zero; 
-        
-#       RegresParamMap [reg_index + 3, 4 * (s-1) + 2] = 1.0 + zero; 
-        
-        reg_index = reg_index + num_factors;
-        
-# State-change regression:
-
-        if (t >= 3) {
-            RegresCoeffDefault [reg_index + 1, 1] =  1.0 + zero;
-            RegresParamMap [reg_index + 2, 3 * (s-1) + 2] = 1.0 + zero;
-            RegresParamMap [reg_index + 3, 3 * (s-1) + 3] = 1.0 + zero;
-        }
-    }
-}
-
-# ----------------------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION
-# ----------------------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-global_weight = 0.5 + zero;
-acceptable_drift = 1.0;
-
-for (t in 1 : num_terms) {
-    for (s in 1 : num_series) {
-        reg_id =  ((t-1) * num_series + (s-1)) * 2 + 1;
-        RegresScaleMult [reg_id    , 1] = global_weight / (acceptable_drift ^ 2);
-        RegresScaleMult [reg_id + 1, 1] = global_weight / (acceptable_drift ^ 2);
-    }
-}
-
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-
-write (initial_reports,    $1, format="text");
-write (CReps,              $2, format="text");
-write (RegresValueMap,     $3, format="text");
-write (RegresFactorDefault,$4, format="text");
-write (RegresParamMap,     $5, format="text");
-write (RegresCoeffDefault, $6, format="text");
-write (RegresScaleMult,    $7, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+
+# GENERATE SYNTHETIC "INITIAL REPORTS"
+
+num_terms = 10;
+num_series = 10;
+num_attrs = 2 * num_series;
+num_frees = num_series * (num_terms + 1);
+
+initial_reports = Rand (rows = num_attrs, cols = num_terms, min = -50.0, max = 50.0);
+
+for (s in 1:num_series) {
+    for (t in 1:(num_terms - 1)) {
+        val = 400 - (t - 14.16) * (t - 5.5) * (t + 3.16) / 2.463552;
+        initial_reports [2 * (s-1) + 1, t] = initial_reports [2 * (s-1) + 1, t] + val;
+    }
+}
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# ---------------------------------------------------------
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+
+for (s in 1:num_series) {
+    for (t in 0:num_terms) {
+        ta_shift = (t - 1) * num_attrs + 2 * s;
+        if (t == 0) {
+            ta_shift = (num_terms - 1) * num_attrs + (2 * s - 1);
+        }        
+        CReps [ta_shift, t * num_series + s] = 1.0 + zero;
+}   }
+
+# In all regressions, except the last few "special" ones, there are 3 factors
+# (here "x" are the "states" and "y" are the "observations"):
+# Observation  regression:  y[t]-x[t] ~ a * 1      ###   + b * (y[t-1]-x[t-1])
+# State-change regression:  x[t] ~ c * x[t-1] + d * (x[t-1]-x[t-2])
+
+num_factors = 3; 
+num_reg_eqs = num_terms * 2 * num_series;
+num_params  = 3 * num_series;
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+
+for (t in 1 : num_terms) {
+    for (s in 1 : num_series) {
+
+        reg_index =  ((t-1) * num_series + (s-1)) * 2 * num_factors;
+
+# Observation regression:
+
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s - 1] = -1.0 + zero; # 1st factor: 
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s    ] =  1.0 + zero; #   -(y[t]-x[t])
+        
+        RegresFactorDefault [reg_index + 2, 1] =  1.0 + zero; # 2nd factor: Intercept
+        
+#       RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s - 1] =  1.0 + zero; # 3rd factor: 
+#       RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s    ] = -1.0 + zero; #   y[t-1]-x[t-1]
+        
+        reg_index = reg_index + num_factors;
+        
+# State-change regression:
+
+        if (t >= 3) {
+            RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s] = -1.0 + zero; # 1st factor: -x[t]
+            RegresValueMap [reg_index + 2, (t-2) * num_attrs + 2 * s] =  1.0 + zero; # 2nd factor: x[t-1]
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s] =  1.0 + zero; # 3rd factor: 
+            RegresValueMap [reg_index + 3, (t-3) * num_attrs + 2 * s] = -1.0 + zero; #   x[t-1]-x[t-2]
+        }
+    }
+}
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+for (t in 1 : num_terms) {
+    for (s in 1 : num_series) {
+
+        reg_index =  ((t-1) * num_series + (s-1)) * 2 * num_factors;
+
+# Observation regression:
+
+        RegresCoeffDefault [reg_index + 1, 1] =  1.0 + zero;
+        RegresParamMap [reg_index + 2, 3 * (s-1) + 1] = 1.0 + zero; 
+        
+#       RegresParamMap [reg_index + 3, 4 * (s-1) + 2] = 1.0 + zero; 
+        
+        reg_index = reg_index + num_factors;
+        
+# State-change regression:
+
+        if (t >= 3) {
+            RegresCoeffDefault [reg_index + 1, 1] =  1.0 + zero;
+            RegresParamMap [reg_index + 2, 3 * (s-1) + 2] = 1.0 + zero;
+            RegresParamMap [reg_index + 3, 3 * (s-1) + 3] = 1.0 + zero;
+        }
+    }
+}
+
+# ----------------------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION
+# ----------------------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+global_weight = 0.5 + zero;
+acceptable_drift = 1.0;
+
+for (t in 1 : num_terms) {
+    for (s in 1 : num_series) {
+        reg_id =  ((t-1) * num_series + (s-1)) * 2 + 1;
+        RegresScaleMult [reg_id    , 1] = global_weight / (acceptable_drift ^ 2);
+        RegresScaleMult [reg_id + 1, 1] = global_weight / (acceptable_drift ^ 2);
+    }
+}
+
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+
+write (initial_reports,    $1, format="text");
+write (CReps,              $2, format="text");
+write (RegresValueMap,     $3, format="text");
+write (RegresFactorDefault,$4, format="text");
+write (RegresParamMap,     $5, format="text");
+write (RegresCoeffDefault, $6, format="text");
+write (RegresScaleMult,    $7, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/tmp.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/tmp.dml b/src/test/scripts/applications/impute/tmp.dml
index b70a4ad..9e0417e 100644
--- a/src/test/scripts/applications/impute/tmp.dml
+++ b/src/test/scripts/applications/impute/tmp.dml
@@ -1,128 +1,128 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-setwd ("test/scripts/applications/glm");
-source ("Misc.dml");
-
-blahblah = 0.0 / 0.0; # -0.00099999999;
-print (blahblah);
-x = matrix (0.0, rows = 55, cols = 1);
-x [55, 1] = blahblah;
-print (castAsScalar (x [55, 1]));
-for (i in 1:9) {
-    x [i, 1] = -0.001 * i;
-}
-for (i in 1:5) {
-    x [(9 * i + 1):(9 * i + 9), 1] = x [(9 * i - 8):(9 * i), 1] * 10;
-}
-y = atan_temporary (x);
-z = tan (y);
-for (i in 1:nrow(x)) {
-    [x_m, x_e] = round_to_print (castAsScalar (x[i,1]));
-    [a_m, a_e] = round_to_print (castAsScalar (y[i,1]));
-    [t_m, t_e] = round_to_print (castAsScalar (z[i,1]));
-    print ("x = " + x_m + "E" + x_e + ";  atan(x) = " + a_m + "E" + a_e + ";  tan(atan(x)) = " + t_m + "E" + t_e);
-}
-
-
-
-
-
-
-coeff_a = -3.14;
-coeff_b =  3.14 * (2 - 3 - 1.7);
-coeff_c = -3.14 * (2 * (-3) + 2 * (-1.7) + (-3) * (-1.7));
-coeff_d =  3.14 * (2 * (-3) * (-1.7));
-
-
-    coeff_aa = coeff_b / coeff_a;
-    coeff_bb = coeff_c / coeff_a;
-    coeff_cc = coeff_d / coeff_a;
-
-    coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
-    coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
-
-    if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
-    {
-        two_pi_third = 2.0943951023931954923084289221863;
-        acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
-        
-        x = abs (acos_argument);
-        acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
-            + x * ( 0.0889789874 + x * (-0.0501743046
-            + x * ( 0.0308918810 + x * (-0.0170881256
-            + x * ( 0.0066700901 + x * (-0.0012624911))))))));
-        if (acos_argument >= 0.0) {
-            coeff_theta = acos_x;
-        } else {
-            coeff_theta = 3.1415926535897932384626433832795 - acos_x;
-        }
-        
-        root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
-        root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
-        root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
-        
-        root_min = min (min (root_1, root_2), root_3);
-        root_max = max (max (root_1, root_2), root_3);
-        root_middle = root_1 + root_2 + root_3 - root_min - root_max;
-        
-        root_1 = root_min; root_2 = root_middle; root_3 = root_max;
-        
-        print ("Three roots:  " + (round (root_1 * 10000) / 10000) + ",  " + (round (root_2 * 10000) / 10000) + ",  " + (round (root_3 * 10000) / 10000));
-        
-    } else {
-        if (coeff_R >= 0.0) {
-            sgn_coeff_R = 1.0;
-        } else {
-            sgn_coeff_R = -1.0;
-        }
-        coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
-        if (coeff_bigA != 0.0) {
-            root = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
-        } else {
-            root = - coeff_aa / 3.0;
-        }
-        print ("One root:  " + (round (root * 10000) / 10000));
-    }
-
-/*
-atan_temporary =
-    function (Matrix [double] Args) return (Matrix [double] AtanArgs)
-{
-    AbsArgs = abs (Args);
-    Eks = AbsArgs + ppred (AbsArgs, 0.0, "==") * 0.000000000001;
-    Eks = ppred (AbsArgs, 1.0, "<=") * Eks + ppred (AbsArgs, 1.0, ">") / Eks;
-    EksSq = Eks * Eks;
-    AtanEks = 
-        Eks   * ( 1.0000000000 + 
-        EksSq * (-0.3333314528 + # Milton Abramowitz and Irene A. Stegun, Eds.
-        EksSq * ( 0.1999355085 + # "Handbook of Mathematical Functions"
-        EksSq * (-0.1420889944 + # U.S. National Bureau of Standards, June 1964
-        EksSq * ( 0.1065626393 + # Section 4.4, page 81, Equation 4.4.49
-        EksSq * (-0.0752896400 +
-        EksSq * ( 0.0429096138 + 
-        EksSq * (-0.0161657367 + 
-        EksSq *   0.0028662257 ))))))));
-    pi_over_two = 1.5707963267948966192313216916398;
-    AtanAbsArgs = ppred (AbsArgs, 1.0, "<=") * AtanEks + ppred (AbsArgs, 1.0, ">") * (pi_over_two - AtanEks);
-    AtanArgs    = (ppred (Args, 0.0, ">=") - ppred (Args, 0.0, "<")) * AtanAbsArgs;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+setwd ("test/scripts/applications/glm");
+source ("Misc.dml");
+
+blahblah = 0.0 / 0.0; # -0.00099999999;
+print (blahblah);
+x = matrix (0.0, rows = 55, cols = 1);
+x [55, 1] = blahblah;
+print (castAsScalar (x [55, 1]));
+for (i in 1:9) {
+    x [i, 1] = -0.001 * i;
+}
+for (i in 1:5) {
+    x [(9 * i + 1):(9 * i + 9), 1] = x [(9 * i - 8):(9 * i), 1] * 10;
+}
+y = atan_temporary (x);
+z = tan (y);
+for (i in 1:nrow(x)) {
+    [x_m, x_e] = round_to_print (castAsScalar (x[i,1]));
+    [a_m, a_e] = round_to_print (castAsScalar (y[i,1]));
+    [t_m, t_e] = round_to_print (castAsScalar (z[i,1]));
+    print ("x = " + x_m + "E" + x_e + ";  atan(x) = " + a_m + "E" + a_e + ";  tan(atan(x)) = " + t_m + "E" + t_e);
+}
+
+
+
+
+
+
+coeff_a = -3.14;
+coeff_b =  3.14 * (2 - 3 - 1.7);
+coeff_c = -3.14 * (2 * (-3) + 2 * (-1.7) + (-3) * (-1.7));
+coeff_d =  3.14 * (2 * (-3) * (-1.7));
+
+
+    coeff_aa = coeff_b / coeff_a;
+    coeff_bb = coeff_c / coeff_a;
+    coeff_cc = coeff_d / coeff_a;
+
+    coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
+    coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
+
+    if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
+    {
+        two_pi_third = 2.0943951023931954923084289221863;
+        acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
+        
+        x = abs (acos_argument);
+        acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
+            + x * ( 0.0889789874 + x * (-0.0501743046
+            + x * ( 0.0308918810 + x * (-0.0170881256
+            + x * ( 0.0066700901 + x * (-0.0012624911))))))));
+        if (acos_argument >= 0.0) {
+            coeff_theta = acos_x;
+        } else {
+            coeff_theta = 3.1415926535897932384626433832795 - acos_x;
+        }
+        
+        root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
+        root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
+        root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
+        
+        root_min = min (min (root_1, root_2), root_3);
+        root_max = max (max (root_1, root_2), root_3);
+        root_middle = root_1 + root_2 + root_3 - root_min - root_max;
+        
+        root_1 = root_min; root_2 = root_middle; root_3 = root_max;
+        
+        print ("Three roots:  " + (round (root_1 * 10000) / 10000) + ",  " + (round (root_2 * 10000) / 10000) + ",  " + (round (root_3 * 10000) / 10000));
+        
+    } else {
+        if (coeff_R >= 0.0) {
+            sgn_coeff_R = 1.0;
+        } else {
+            sgn_coeff_R = -1.0;
+        }
+        coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
+        if (coeff_bigA != 0.0) {
+            root = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
+        } else {
+            root = - coeff_aa / 3.0;
+        }
+        print ("One root:  " + (round (root * 10000) / 10000));
+    }
+
+/*
+atan_temporary =
+    function (Matrix [double] Args) return (Matrix [double] AtanArgs)
+{
+    AbsArgs = abs (Args);
+    Eks = AbsArgs + ppred (AbsArgs, 0.0, "==") * 0.000000000001;
+    Eks = ppred (AbsArgs, 1.0, "<=") * Eks + ppred (AbsArgs, 1.0, ">") / Eks;
+    EksSq = Eks * Eks;
+    AtanEks = 
+        Eks   * ( 1.0000000000 + 
+        EksSq * (-0.3333314528 + # Milton Abramowitz and Irene A. Stegun, Eds.
+        EksSq * ( 0.1999355085 + # "Handbook of Mathematical Functions"
+        EksSq * (-0.1420889944 + # U.S. National Bureau of Standards, June 1964
+        EksSq * ( 0.1065626393 + # Section 4.4, page 81, Equation 4.4.49
+        EksSq * (-0.0752896400 +
+        EksSq * ( 0.0429096138 + 
+        EksSq * (-0.0161657367 + 
+        EksSq *   0.0028662257 ))))))));
+    pi_over_two = 1.5707963267948966192313216916398;
+    AtanAbsArgs = ppred (AbsArgs, 1.0, "<=") * AtanEks + ppred (AbsArgs, 1.0, ">") * (pi_over_two - AtanEks);
+    AtanArgs    = (ppred (Args, 0.0, ">=") - ppred (Args, 0.0, "<")) * AtanAbsArgs;
+}
 */
\ No newline at end of file


[42/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/l2-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml
index 04b83b0..140ac5a 100644
--- a/scripts/algorithms/l2-svm.dml
+++ b/scripts/algorithms/l2-svm.dml
@@ -1,159 +1,159 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements binary-class SVM with squared slack variables
-#
-# Example Usage:
-# Assume L2SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume epsilon = 0.001, lambda = 1, maxiterations = 100
-#
-# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-# Note about inputs: 
-# Assumes that labels (entries in Y) 
-# are set to either -1 or +1
-# or the result of recoding
-#
-
-cmdLine_fmt = ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol = ifdef($tol, 0.001)
-cmdLine_reg = ifdef($reg, 1.0)
-cmdLine_maxiter = ifdef($maxiter, 100)
-
-X = read($X)
-Y = read($Y)
-
-if(nrow(X) < 2)
-	stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(ppred(Y, check_min, "=="))
-num_max = sum(ppred(Y, check_max, "=="))
-
-if(check_min == check_max)
-	stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
-
-if(num_min + num_max != nrow(Y))
-	stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
-	
-if(check_min != -1 | check_max != +1) 
-	Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-
-positive_label = check_max
-negative_label = check_min
-
-continue = 1
-
-intercept = cmdLine_icpt
-if(intercept != 0 & intercept != 1)
-	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
-
-epsilon = cmdLine_tol
-if(epsilon < 0)
-	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
-	
-lambda = cmdLine_reg
-if(lambda < 0)
-	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-	
-maxiterations = cmdLine_maxiter
-if(maxiterations < 1)
-	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
-
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1, rows=num_samples, cols=1)
-	X = append(X, ones);
-}
-
-num_rows_in_w = dimensions
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-Xw = matrix(0, rows=nrow(X), cols=1)
-debug_str = "# Iter, Obj"
-iter = 0
-while(continue == 1 & iter < maxiterations)  {
-	# minimizing primal obj along direction s
-    step_sz = 0
-    Xd = X %*% s
-    wd = lambda * sum(w * s)
-    dd = lambda * sum(s * s)
-    continue1 = 1
-    while(continue1 == 1){
-		tmp_Xw = Xw + step_sz*Xd
-      	out = 1 - Y * (tmp_Xw)
-      	sv = ppred(out, 0, ">")
-      	out = out * sv
-      	g = wd + step_sz*dd - sum(out * Y * Xd)
-      	h = dd + sum(Xd * sv * Xd)
-      	step_sz = step_sz - g/h
-      	if (g*g/h < 0.0000000001){
-        	continue1 = 0
-      	}
-    }
-
-    #update weights
-    w = w + step_sz*s
-	Xw = Xw + step_sz*Xd
-	
-    out = 1 - Y * Xw
-    sv = ppred(out, 0, ">")
-    out = sv * out
-    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-    g_new = t(X) %*% (out * Y) - lambda * w
-
-    print("ITER " + iter + ": OBJ=" + obj)
-	debug_str = append(debug_str, iter + "," + obj)
-	
-    tmp = sum(s * g_old)
-    if(step_sz*tmp < epsilon*obj){
-    	continue = 0
-    }
-
-    #non-linear CG step
-    be = sum(g_new * g_new)/sum(g_old * g_old)
-    s = be * s + g_new
-    g_old = g_new
-
-    iter = iter + 1
-}
-
-extra_model_params = matrix(0, rows=4, cols=1)
-extra_model_params[1,1] = positive_label
-extra_model_params[2,1] = negative_label
-extra_model_params[3,1] = intercept
-extra_model_params[4,1] = dimensions
-
-w = t(append(t(w), t(extra_model_params)))
-write(w, $model, format=cmdLine_fmt)
-
-write(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements binary-class SVM with squared slack variables
+#
+# Example Usage:
+# Assume L2SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume epsilon = 0.001, lambda = 1, maxiterations = 100
+#
+# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+# Note about inputs: 
+# Assumes that labels (entries in Y) 
+# are set to either -1 or +1
+# or the result of recoding
+#
+
+cmdLine_fmt = ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol = ifdef($tol, 0.001)
+cmdLine_reg = ifdef($reg, 1.0)
+cmdLine_maxiter = ifdef($maxiter, 100)
+
+X = read($X)
+Y = read($Y)
+
+if(nrow(X) < 2)
+	stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(ppred(Y, check_min, "=="))
+num_max = sum(ppred(Y, check_max, "=="))
+
+if(check_min == check_max)
+	stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
+
+if(num_min + num_max != nrow(Y))
+	stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
+	
+if(check_min != -1 | check_max != +1) 
+	Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+
+positive_label = check_max
+negative_label = check_min
+
+continue = 1
+
+intercept = cmdLine_icpt
+if(intercept != 0 & intercept != 1)
+	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
+
+epsilon = cmdLine_tol
+if(epsilon < 0)
+	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
+	
+lambda = cmdLine_reg
+if(lambda < 0)
+	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
+	
+maxiterations = cmdLine_maxiter
+if(maxiterations < 1)
+	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
+
+num_samples = nrow(X)
+dimensions = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1)
+	X = append(X, ones);
+}
+
+num_rows_in_w = dimensions
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, rows=num_rows_in_w, cols=1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+Xw = matrix(0, rows=nrow(X), cols=1)
+debug_str = "# Iter, Obj"
+iter = 0
+while(continue == 1 & iter < maxiterations)  {
+	# minimizing primal obj along direction s
+    step_sz = 0
+    Xd = X %*% s
+    wd = lambda * sum(w * s)
+    dd = lambda * sum(s * s)
+    continue1 = 1
+    while(continue1 == 1){
+		tmp_Xw = Xw + step_sz*Xd
+      	out = 1 - Y * (tmp_Xw)
+      	sv = ppred(out, 0, ">")
+      	out = out * sv
+      	g = wd + step_sz*dd - sum(out * Y * Xd)
+      	h = dd + sum(Xd * sv * Xd)
+      	step_sz = step_sz - g/h
+      	if (g*g/h < 0.0000000001){
+        	continue1 = 0
+      	}
+    }
+
+    #update weights
+    w = w + step_sz*s
+	Xw = Xw + step_sz*Xd
+	
+    out = 1 - Y * Xw
+    sv = ppred(out, 0, ">")
+    out = sv * out
+    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+    g_new = t(X) %*% (out * Y) - lambda * w
+
+    print("ITER " + iter + ": OBJ=" + obj)
+	debug_str = append(debug_str, iter + "," + obj)
+	
+    tmp = sum(s * g_old)
+    if(step_sz*tmp < epsilon*obj){
+    	continue = 0
+    }
+
+    #non-linear CG step
+    be = sum(g_new * g_new)/sum(g_old * g_old)
+    s = be * s + g_new
+    g_old = g_new
+
+    iter = iter + 1
+}
+
+extra_model_params = matrix(0, rows=4, cols=1)
+extra_model_params[1,1] = positive_label
+extra_model_params[2,1] = negative_label
+extra_model_params[3,1] = intercept
+extra_model_params[4,1] = dimensions
+
+w = t(append(t(w), t(extra_model_params)))
+write(w, $model, format=cmdLine_fmt)
+
+write(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/m-svm-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm-predict.dml b/scripts/algorithms/m-svm-predict.dml
index 4d0c736..ba06cf6 100644
--- a/scripts/algorithms/m-svm-predict.dml
+++ b/scripts/algorithms/m-svm-predict.dml
@@ -1,84 +1,84 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# This script can be used to compute label predictions
-# Meant for use with an SVM model (learnt using m-svm.dml) on a held out test set
-#
-# Given ground truth labels, the script will compute an 
-# accuracy (%) for the predictions
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
-#													 
-
-cmdLine_Y = ifdef($Y, " ")
-cmdLine_confusion = ifdef($confusion, " ")
-cmdLine_accuracy = ifdef($accuracy, " ")
-cmdLine_scores = ifdef($scores, " ")
-cmdLine_fmt = ifdef($fmt, "text")
-
-X = read($X);
-W = read($model);
-
-dimensions = as.scalar(W[nrow(W),1])
-if(dimensions != ncol(X))
-	stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
-
-intercept = as.scalar(W[nrow(W)-1,1])
-W = W[1:(nrow(W)-2),]
-
-N = nrow(X);
-num_classes = ncol(W)
-m=ncol(X);
-
-b = matrix(0, rows=1, cols=num_classes)
-if (intercept == 1)
-	b = W[m+1,]
-
-ones = matrix(1, rows=N, cols=1)
-scores = X %*% W[1:m,] + ones %*% b;
-	
-if(cmdLine_scores != " ")
-	write(scores, cmdLine_scores, format=cmdLine_fmt);
-
-if(cmdLine_Y != " "){
-	y = read(cmdLine_Y);
-	
-	if(min(y) < 1)
-		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
-	
-	pred = rowIndexMax(scores);
-	correct_percentage = sum(ppred(pred - y, 0, "==")) / N * 100;
-	
-	acc_str = "Accuracy (%): " + correct_percentage
-	print(acc_str)
-	if(cmdLine_accuracy != " ")
-		write(acc_str, cmdLine_accuracy)
-
-	num_classes_ground_truth = max(y)
-	if(num_classes < num_classes_ground_truth)
-		num_classes = num_classes_ground_truth
-
-	if(cmdLine_confusion != " "){
-		confusion_mat = table(pred, y, num_classes, num_classes)
-		write(confusion_mat, cmdLine_confusion, format="csv")
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script can be used to compute label predictions
+# Meant for use with an SVM model (learnt using m-svm.dml) on a held out test set
+#
+# Given ground truth labels, the script will compute an 
+# accuracy (%) for the predictions
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
+#													 
+
+cmdLine_Y = ifdef($Y, " ")
+cmdLine_confusion = ifdef($confusion, " ")
+cmdLine_accuracy = ifdef($accuracy, " ")
+cmdLine_scores = ifdef($scores, " ")
+cmdLine_fmt = ifdef($fmt, "text")
+
+X = read($X);
+W = read($model);
+
+dimensions = as.scalar(W[nrow(W),1])
+if(dimensions != ncol(X))
+	stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
+
+intercept = as.scalar(W[nrow(W)-1,1])
+W = W[1:(nrow(W)-2),]
+
+N = nrow(X);
+num_classes = ncol(W)
+m=ncol(X);
+
+b = matrix(0, rows=1, cols=num_classes)
+if (intercept == 1)
+	b = W[m+1,]
+
+ones = matrix(1, rows=N, cols=1)
+scores = X %*% W[1:m,] + ones %*% b;
+	
+if(cmdLine_scores != " ")
+	write(scores, cmdLine_scores, format=cmdLine_fmt);
+
+if(cmdLine_Y != " "){
+	y = read(cmdLine_Y);
+	
+	if(min(y) < 1)
+		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+	
+	pred = rowIndexMax(scores);
+	correct_percentage = sum(ppred(pred - y, 0, "==")) / N * 100;
+	
+	acc_str = "Accuracy (%): " + correct_percentage
+	print(acc_str)
+	if(cmdLine_accuracy != " ")
+		write(acc_str, cmdLine_accuracy)
+
+	num_classes_ground_truth = max(y)
+	if(num_classes < num_classes_ground_truth)
+		num_classes = num_classes_ground_truth
+
+	if(cmdLine_confusion != " "){
+		confusion_mat = table(pred, y, num_classes, num_classes)
+		write(confusion_mat, cmdLine_confusion, format="csv")
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/m-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm.dml b/scripts/algorithms/m-svm.dml
index c570872..560d46f 100644
--- a/scripts/algorithms/m-svm.dml
+++ b/scripts/algorithms/m-svm.dml
@@ -1,174 +1,174 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt = ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol = ifdef($tol, 0.001)
-cmdLine_reg = ifdef($reg, 1.0)
-cmdLine_maxiter = ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = read($X)
-
-if(nrow(X) < 2)
-	stop("Stopping due to invalid inputs: Not possible to learn a classifier without at least 2 rows")
-
-dimensions = ncol(X)
-
-Y = read($Y)
-
-if(nrow(X) != nrow(Y))
-	stop("Stopping due to invalid argument: Numbers of rows in X and Y must match")
-
-intercept = cmdLine_icpt
-if(intercept != 0 & intercept != 1)
-	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
-
-min_y = min(Y)
-if(min_y < 1)
-	stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
-num_classes = max(Y)
-if(num_classes == 1)
-	stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier")	
-mod1 = Y %% 1
-mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "==")))
-if(mod1_should_be_nrow != nrow(Y))
-	stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels")
-	
-epsilon = cmdLine_tol
-if(epsilon < 0)
-	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
-
-lambda = cmdLine_reg
-if(lambda < 0)
-	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-
-max_iterations = cmdLine_maxiter
-if(max_iterations < 1)
-	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
-
-num_samples = nrow(X)
-num_features = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1, rows=num_samples, cols=1);
-	X = append(X, ones);
-}
-
-num_rows_in_w = num_features
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=num_classes)
-
-debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
-parfor(iter_class in 1:num_classes){		  
-	Y_local = 2 * ppred(Y, iter_class, "==") - 1
-	w_class = matrix(0, rows=num_features, cols=1)
-	if (intercept == 1) {
-		zero_matrix = matrix(0, rows=1, cols=1);
-		w_class = t(append(t(w_class), zero_matrix));
-	}
- 
-	g_old = t(X) %*% Y_local
-	s = g_old
-
-	Xw = matrix(0, rows=nrow(X), cols=1)
-	iter = 0
-	continue = 1
-	while(continue == 1)  {
-		# minimizing primal obj along direction s
- 		step_sz = 0
- 		Xd = X %*% s
- 		wd = lambda * sum(w_class * s)
-		dd = lambda * sum(s * s)
-		continue1 = 1
-		while(continue1 == 1){
- 			tmp_Xw = Xw + step_sz*Xd
- 			out = 1 - Y_local * (tmp_Xw)
- 			sv = ppred(out, 0, ">")
- 			out = out * sv
- 			g = wd + step_sz*dd - sum(out * Y_local * Xd)
- 			h = dd + sum(Xd * sv * Xd)
- 			step_sz = step_sz - g/h
- 			if (g*g/h < 0.0000000001){
-			continue1 = 0
-		}
-	}
- 
-		#update weights
-		w_class = w_class + step_sz*s
-		Xw = Xw + step_sz*Xd
- 
-		out = 1 - Y_local * Xw
-		sv = ppred(out, 0, ">")
-		out = sv * out
-		obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-  		g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-  		tmp = sum(s * g_old)
-  
-  		train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-  		print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-  		debug_mat[iter+1,iter_class] = obj	   
-   
-  		if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-   			continue = 0
-  		}
- 
-  		#non-linear CG step
-  		be = sum(g_new * g_new)/sum(g_old * g_old)
-  		s = be * s + g_new
-  		g_old = g_new
-
-  		iter = iter + 1
- 	}
-
-	w[,iter_class] = w_class
-}
-
-extra_model_params = matrix(0, rows=2, cols=ncol(w))
-extra_model_params[1, 1] = intercept
-extra_model_params[2, 1] = dimensions
-w = t(append(t(w), t(extra_model_params)))
-write(w, $model, format=cmdLine_fmt)
-
-debug_str = "# Class, Iter, Obj"
-for(iter_class in 1:ncol(debug_mat)){
-	for(iter in 1:nrow(debug_mat)){
-		obj = castAsScalar(debug_mat[iter, iter_class])
-		if(obj != -1) 
-			debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-	}
-}
-write(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt = ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol = ifdef($tol, 0.001)
+cmdLine_reg = ifdef($reg, 1.0)
+cmdLine_maxiter = ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = read($X)
+
+if(nrow(X) < 2)
+	stop("Stopping due to invalid inputs: Not possible to learn a classifier without at least 2 rows")
+
+dimensions = ncol(X)
+
+Y = read($Y)
+
+if(nrow(X) != nrow(Y))
+	stop("Stopping due to invalid argument: Numbers of rows in X and Y must match")
+
+intercept = cmdLine_icpt
+if(intercept != 0 & intercept != 1)
+	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
+
+min_y = min(Y)
+if(min_y < 1)
+	stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+num_classes = max(Y)
+if(num_classes == 1)
+	stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier")	
+mod1 = Y %% 1
+mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "==")))
+if(mod1_should_be_nrow != nrow(Y))
+	stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels")
+	
+epsilon = cmdLine_tol
+if(epsilon < 0)
+	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
+
+lambda = cmdLine_reg
+if(lambda < 0)
+	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
+
+max_iterations = cmdLine_maxiter
+if(max_iterations < 1)
+	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
+
+num_samples = nrow(X)
+num_features = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1);
+	X = append(X, ones);
+}
+
+num_rows_in_w = num_features
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, rows=num_rows_in_w, cols=num_classes)
+
+debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+parfor(iter_class in 1:num_classes){		  
+	Y_local = 2 * ppred(Y, iter_class, "==") - 1
+	w_class = matrix(0, rows=num_features, cols=1)
+	if (intercept == 1) {
+		zero_matrix = matrix(0, rows=1, cols=1);
+		w_class = t(append(t(w_class), zero_matrix));
+	}
+ 
+	g_old = t(X) %*% Y_local
+	s = g_old
+
+	Xw = matrix(0, rows=nrow(X), cols=1)
+	iter = 0
+	continue = 1
+	while(continue == 1)  {
+		# minimizing primal obj along direction s
+ 		step_sz = 0
+ 		Xd = X %*% s
+ 		wd = lambda * sum(w_class * s)
+		dd = lambda * sum(s * s)
+		continue1 = 1
+		while(continue1 == 1){
+ 			tmp_Xw = Xw + step_sz*Xd
+ 			out = 1 - Y_local * (tmp_Xw)
+ 			sv = ppred(out, 0, ">")
+ 			out = out * sv
+ 			g = wd + step_sz*dd - sum(out * Y_local * Xd)
+ 			h = dd + sum(Xd * sv * Xd)
+ 			step_sz = step_sz - g/h
+ 			if (g*g/h < 0.0000000001){
+			continue1 = 0
+		}
+	}
+ 
+		#update weights
+		w_class = w_class + step_sz*s
+		Xw = Xw + step_sz*Xd
+ 
+		out = 1 - Y_local * Xw
+		sv = ppred(out, 0, ">")
+		out = sv * out
+		obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+  		g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+  		tmp = sum(s * g_old)
+  
+  		train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+  		print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+  		debug_mat[iter+1,iter_class] = obj	   
+   
+  		if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+   			continue = 0
+  		}
+ 
+  		#non-linear CG step
+  		be = sum(g_new * g_new)/sum(g_old * g_old)
+  		s = be * s + g_new
+  		g_old = g_new
+
+  		iter = iter + 1
+ 	}
+
+	w[,iter_class] = w_class
+}
+
+extra_model_params = matrix(0, rows=2, cols=ncol(w))
+extra_model_params[1, 1] = intercept
+extra_model_params[2, 1] = dimensions
+w = t(append(t(w), t(extra_model_params)))
+write(w, $model, format=cmdLine_fmt)
+
+debug_str = "# Class, Iter, Obj"
+for(iter_class in 1:ncol(debug_mat)){
+	for(iter in 1:nrow(debug_mat)){
+		obj = castAsScalar(debug_mat[iter, iter_class])
+		if(obj != -1) 
+			debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+	}
+}
+write(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/random-forest-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/random-forest-predict.dml b/scripts/algorithms/random-forest-predict.dml
index 2d99670..7bc6cd6 100644
--- a/scripts/algorithms/random-forest-predict.dml
+++ b/scripts/algorithms/random-forest-predict.dml
@@ -1,193 +1,193 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A RANDOM FOREST MODEL ON A HELD OUT TEST SET 
-# OR FOR COMPUTING THE OUT-OF-BAG ERROR ON THE TRAINING SET.
-#
-# INPUT         PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME          TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X             String   ---          Location to read test feature matrix or training feature matrix for computing Out-Of-Bag error; 
-#									  note that X needs to be both recoded and dummy coded 
-# Y	 		    String   " "		  Location to read true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
-# R   	  		String   " "	      Location to read the matrix R which for each feature in X contains the following information 
-#										- R[,1]: column ids
-#										- R[,2]: start indices 
-#										- R[,3]: end indices
-#									  If R is not provided by default all variables are assumed to be scale
-# M             String 	 ---	   	  Location to read matrix M containing the learned tree i the following format
-#								 		- M[1,j]: id of node j (in a complete binary tree)
-#										- M[2,j]: tree id 
-#	 									- M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#	 									- M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
-#	 									- M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
-#		     									  otherwise the label that leaf node j is supposed to predict
-#	 									- M[6,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
-#			 									  stored in rows 7,8,... if j is categorical 
-#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
-#	 									- M[7:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[7,j] 
-#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 7,8,... 
-#												   depict the value subset chosen for j
-#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-# C 			String   " "		  Location to read the counts matrix containing the number of times samples are chosen in each tree of the random forest
-# P				String   ---		  Location to store the label predictions for X
-# A     		String   " "          Location to store the test accuracy (%) for the prediction if requested
-# OOB 			String   " "		  If C is provided location to store the Out-Of-Bag (OOB) error of the learned model 
-# CM     		String   " "		  Location to store the confusion matrix if requested 
-# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-#	1- Matrix Y containing the predicted labels for X 
-#   2- Test accuracy if requested
-#   3- Confusion matrix C if requested
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f random-forest-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
-#														A=OUTPUT_DIR/accurcay CM=OUTPUT_DIR/confusion fmt=csv
-
-fileX = $X;
-fileM = $M;
-fileP = $P;
-fileY = ifdef ($Y, " ");
-fileR = ifdef ($R, " ");
-fileC = ifdef ($C, " ");
-fileOOB = ifdef ($OOB, " ");
-fileCM = ifdef ($CM, " ");
-fileA = ifdef ($A, " ");
-fmtO = ifdef ($fmt, "text");
-X = read (fileX);
-M = read (fileM);
-
-num_records = nrow (X);
-Y_predicted = matrix (0, rows = num_records, cols = 1);
-num_trees  = max (M[2,]);
-num_labels = max (M[5,]);
-num_nodes_per_tree = aggregate (target = t (M[2,]), groups = t (M[2,]), fn = "count");
-num_nodes_per_tree_cum = cumsum (num_nodes_per_tree);
-
-R_cat = matrix (0, rows = 1, cols = 1);
-R_scale = matrix (0, rows = 1, cols = 1);
-
-if (fileR != " ") {
-	R = read (fileR);
-	dummy_coded = ppred (R[,2], R[,3], "!=");
-	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
-	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
-} else { # only scale features available
-	R_scale = seq (1, ncol (X));
-}
-
-if (fileC != " ") {
-	C = read (fileC);
-	label_counts_oob = matrix (0, rows = num_records, cols = num_labels);
-}
-
-label_counts = matrix (0, rows = num_records, cols = num_labels); 
-parfor (i in 1:num_records, check = 0) {
-	cur_sample = X[i,];
-	cur_node_pos = 1;
-	# cur_node = 1;
-	cur_tree = 1;
-	start_ind = 1;
-	labels_found = FALSE;
-	while (!labels_found) {
-		
-		cur_feature = as.scalar (M[4,cur_node_pos]);
-		type_label = as.scalar (M[5,cur_node_pos]);
-		if (cur_feature == 0) { # leaf found
-			label_counts[i,type_label] = label_counts[i,type_label] + 1;
-			if (fileC != " ") {
-				if (as.scalar (C[i,cur_tree]) == 0) label_counts_oob[i,type_label] = label_counts_oob[i,type_label] + 1;
-			}
-			if (cur_tree < num_trees) {
-				cur_node_pos = as.scalar (num_nodes_per_tree_cum[cur_tree,]) + 1;
-			} else if (cur_tree == num_trees) {
-				labels_found = TRUE;
-			}
-			cur_tree = cur_tree + 1;
-		} else {
-			# determine type: 1 for scale, 2 for categorical 
-			if (type_label == 1) { # scale feature
-				cur_start_ind = as.scalar (R_scale[cur_feature,]);
-				cur_value = as.scalar (cur_sample[,cur_start_ind]);
-				cur_split = as.scalar (M[7,cur_node_pos]);
-				if (cur_value < cur_split) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);					
-				}
-			} else if (type_label == 2) { # categorical feature				
-				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
-				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
-				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); 
-				cur_offset = as.scalar (M[6,cur_node_pos]);
-				value_found = sum (ppred (M[7:(7 + cur_offset - 1),cur_node_pos], cur_value, "=="));
-				if (value_found >= 1) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);						
-				}
-		
-			}
-}}}
-
-Y_predicted = rowIndexMax (label_counts);
-write (Y_predicted, fileP, format = fmtO);
-
-if (fileY != " ") {
-	Y_dummy = read (fileY);
-	num_classes = ncol (Y_dummy);
-	Y = rowSums (Y_dummy * t (seq (1, num_classes)));
-	result = ppred (Y, Y_predicted, "==");
-	result = sum (result);
-	accuracy = result / num_records * 100;
-	acc_str = "Accuracy (%): " + accuracy;
-	if (fileA != " ") {
-		write (acc_str, fileA, format = fmtO);
-	} else {
-		print (acc_str);
-	}
-	if (fileC != " ") {
-		oob_ind = ppred (rowSums (label_counts_oob), 0, ">")
-		label_counts_oob = removeEmpty (target = label_counts_oob, margin = "rows");
-		num_oob = nrow (label_counts_oob);
-		Y_predicted_oob = rowIndexMax (label_counts_oob);
-		Y_oob = removeEmpty (target = Y * oob_ind, margin = "rows");
-		result = ppred (Y_oob, Y_predicted_oob, "==");
-		oob_error = (1 - (sum (result) / num_oob)) * 100;
-		oob_str = "Out-Of-Bag error (%): " + oob_error;
-		if (fileOOB != " ") {
-			write (oob_str, fileOOB, format = fmtO);
-		} else {
-			print (oob_str);
-		}
-	}
-	if (fileCM != " ") {
-		confusion_mat = table(Y_predicted, Y, num_classes, num_classes)
-        write(confusion_mat, fileCM, format = fmtO)
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A RANDOM FOREST MODEL ON A HELD OUT TEST SET 
+# OR FOR COMPUTING THE OUT-OF-BAG ERROR ON THE TRAINING SET.
+#
+# INPUT         PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME          TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X             String   ---          Location to read test feature matrix or training feature matrix for computing Out-Of-Bag error; 
+#									  note that X needs to be both recoded and dummy coded 
+# Y	 		    String   " "		  Location to read true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
+# R   	  		String   " "	      Location to read the matrix R which for each feature in X contains the following information 
+#										- R[,1]: column ids
+#										- R[,2]: start indices 
+#										- R[,3]: end indices
+#									  If R is not provided by default all variables are assumed to be scale
+# M             String 	 ---	   	  Location to read matrix M containing the learned tree i the following format
+#								 		- M[1,j]: id of node j (in a complete binary tree)
+#										- M[2,j]: tree id 
+#	 									- M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+#	 									- M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
+#	 									- M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
+#		     									  otherwise the label that leaf node j is supposed to predict
+#	 									- M[6,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
+#			 									  stored in rows 7,8,... if j is categorical 
+#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
+#	 									- M[7:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[7,j] 
+#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 7,8,... 
+#												   depict the value subset chosen for j
+#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# C 			String   " "		  Location to read the counts matrix containing the number of times samples are chosen in each tree of the random forest
+# P				String   ---		  Location to store the label predictions for X
+# A     		String   " "          Location to store the test accuracy (%) for the prediction if requested
+# OOB 			String   " "		  If C is provided location to store the Out-Of-Bag (OOB) error of the learned model 
+# CM     		String   " "		  Location to store the confusion matrix if requested 
+# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+#	1- Matrix Y containing the predicted labels for X 
+#   2- Test accuracy if requested
+#   3- Confusion matrix C if requested
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f random-forest-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
+#														A=OUTPUT_DIR/accurcay CM=OUTPUT_DIR/confusion fmt=csv
+
+fileX = $X;
+fileM = $M;
+fileP = $P;
+fileY = ifdef ($Y, " ");
+fileR = ifdef ($R, " ");
+fileC = ifdef ($C, " ");
+fileOOB = ifdef ($OOB, " ");
+fileCM = ifdef ($CM, " ");
+fileA = ifdef ($A, " ");
+fmtO = ifdef ($fmt, "text");
+X = read (fileX);
+M = read (fileM);
+
+num_records = nrow (X);
+Y_predicted = matrix (0, rows = num_records, cols = 1);
+num_trees  = max (M[2,]);
+num_labels = max (M[5,]);
+num_nodes_per_tree = aggregate (target = t (M[2,]), groups = t (M[2,]), fn = "count");
+num_nodes_per_tree_cum = cumsum (num_nodes_per_tree);
+
+R_cat = matrix (0, rows = 1, cols = 1);
+R_scale = matrix (0, rows = 1, cols = 1);
+
+if (fileR != " ") {
+	R = read (fileR);
+	dummy_coded = ppred (R[,2], R[,3], "!=");
+	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
+	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
+} else { # only scale features available
+	R_scale = seq (1, ncol (X));
+}
+
+if (fileC != " ") {
+	C = read (fileC);
+	label_counts_oob = matrix (0, rows = num_records, cols = num_labels);
+}
+
+label_counts = matrix (0, rows = num_records, cols = num_labels); 
+parfor (i in 1:num_records, check = 0) {
+	cur_sample = X[i,];
+	cur_node_pos = 1;
+	# cur_node = 1;
+	cur_tree = 1;
+	start_ind = 1;
+	labels_found = FALSE;
+	while (!labels_found) {
+		
+		cur_feature = as.scalar (M[4,cur_node_pos]);
+		type_label = as.scalar (M[5,cur_node_pos]);
+		if (cur_feature == 0) { # leaf found
+			label_counts[i,type_label] = label_counts[i,type_label] + 1;
+			if (fileC != " ") {
+				if (as.scalar (C[i,cur_tree]) == 0) label_counts_oob[i,type_label] = label_counts_oob[i,type_label] + 1;
+			}
+			if (cur_tree < num_trees) {
+				cur_node_pos = as.scalar (num_nodes_per_tree_cum[cur_tree,]) + 1;
+			} else if (cur_tree == num_trees) {
+				labels_found = TRUE;
+			}
+			cur_tree = cur_tree + 1;
+		} else {
+			# determine type: 1 for scale, 2 for categorical 
+			if (type_label == 1) { # scale feature
+				cur_start_ind = as.scalar (R_scale[cur_feature,]);
+				cur_value = as.scalar (cur_sample[,cur_start_ind]);
+				cur_split = as.scalar (M[7,cur_node_pos]);
+				if (cur_value < cur_split) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);					
+				}
+			} else if (type_label == 2) { # categorical feature				
+				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
+				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
+				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); 
+				cur_offset = as.scalar (M[6,cur_node_pos]);
+				value_found = sum (ppred (M[7:(7 + cur_offset - 1),cur_node_pos], cur_value, "=="));
+				if (value_found >= 1) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);						
+				}
+		
+			}
+}}}
+
+Y_predicted = rowIndexMax (label_counts);
+write (Y_predicted, fileP, format = fmtO);
+
+if (fileY != " ") {
+	Y_dummy = read (fileY);
+	num_classes = ncol (Y_dummy);
+	Y = rowSums (Y_dummy * t (seq (1, num_classes)));
+	result = ppred (Y, Y_predicted, "==");
+	result = sum (result);
+	accuracy = result / num_records * 100;
+	acc_str = "Accuracy (%): " + accuracy;
+	if (fileA != " ") {
+		write (acc_str, fileA, format = fmtO);
+	} else {
+		print (acc_str);
+	}
+	if (fileC != " ") {
+		oob_ind = ppred (rowSums (label_counts_oob), 0, ">")
+		label_counts_oob = removeEmpty (target = label_counts_oob, margin = "rows");
+		num_oob = nrow (label_counts_oob);
+		Y_predicted_oob = rowIndexMax (label_counts_oob);
+		Y_oob = removeEmpty (target = Y * oob_ind, margin = "rows");
+		result = ppred (Y_oob, Y_predicted_oob, "==");
+		oob_error = (1 - (sum (result) / num_oob)) * 100;
+		oob_str = "Out-Of-Bag error (%): " + oob_error;
+		if (fileOOB != " ") {
+			write (oob_str, fileOOB, format = fmtO);
+		} else {
+			print (oob_str);
+		}
+	}
+	if (fileCM != " ") {
+		confusion_mat = table(Y_predicted, Y, num_classes, num_classes)
+        write(confusion_mat, fileCM, format = fmtO)
+	}
+}



[54/55] incubator-systemml git commit: [SYSTEMML-480] [SYSTEMML-463] Fix Release Packaging in Prep for 0.9.0 Release.

Posted by du...@apache.org.
[SYSTEMML-480] [SYSTEMML-463] Fix Release Packaging in Prep for 0.9.0 Release.

This fix addresses additional issues with our release packaging that blocked our 0.9.0 release candidate.  Changes include cleaning up files, adding missing files, updating the naming from 'system-ml-*' to 'systemml-*', and fixing broken dependencies.  Additionally, this adds experimental support for a standalone JAR that we can use in the future.

Closes #54.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d766fbff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d766fbff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d766fbff

Branch: refs/heads/branch-0.9
Commit: d766fbffbb5bea7fd9e26035858436e5b471a689
Parents: 048ac6d
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Jan 25 13:25:43 2016 -0800
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Jan 25 13:25:43 2016 -0800

----------------------------------------------------------------------
 docs/Language Reference/README.txt              | 87 ------------------
 .../Language Reference/README_HADOOP_CONFIG.txt | 83 +++++++++++++++++
 pom.xml                                         | 27 +++++-
 src/assembly/distrib.xml                        | 50 ++++------
 src/assembly/source.xml                         |  5 +-
 src/assembly/standalone-jar.xml                 | 89 ++++++++++++++++++
 src/assembly/standalone.xml                     | 96 ++++++++------------
 src/main/standalone/log4j.properties            |  4 +-
 8 files changed, 253 insertions(+), 188 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/docs/Language Reference/README.txt
----------------------------------------------------------------------
diff --git a/docs/Language Reference/README.txt b/docs/Language Reference/README.txt
deleted file mode 100644
index 0f22aa6..0000000
--- a/docs/Language Reference/README.txt	
+++ /dev/null
@@ -1,87 +0,0 @@
-Usage
------
-The machine learning algorithms described in 
-$BIGINSIGHTS_HOME/machine-learning/docs/SystemML_Algorithms_Reference.pdf can be invoked
-from the hadoop command line using the described, algorithm-specific parameters. 
-
-Generic command line arguments arguments are provided by the help command below.
-
-   hadoop jar SystemML.jar -? or -help 
-
-
-Recommended configurations
---------------------------
-1) JVM Heap Sizes: 
-We recommend an equal-sized JVM configuration for clients, mappers, and reducers. For the client
-process this can be done via 
-
-   export HADOOP_CLIENT_OPTS="-Xmx2048m -Xms2048m -Xmn256m" 
-   
-where Xmx specifies the maximum heap size, Xms the initial heap size, and Xmn is size of the young 
-generation. For Xmn values of equal or less than 15% of the max heap size, we guarantee the memory budget.
-
-The above option may also be set through BigR setting the "ml.jvm" option, e.g.
-   bigr.set.server.option("jaql.fence.jvm.parameters", "-Xmx2g -Xms2g -Xmn256m")
-
-For mapper or reducer JVM configurations, the following properties can be specified in mapred-site.xml, 
-where 'child' refers to both mapper and reducer. If map and reduce are specified individually, they take 
-precedence over the generic property.
-
-  <property>
-    <name>mapreduce.child.java.opts</name> <!-- synonym: mapred.child.java.opts -->
-    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
-  </property>
-  <property>
-    <name>mapreduce.map.java.opts</name> <!-- synonym: mapred.map.java.opts -->
-    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
-  </property>
-  <property>
-    <name>mapreduce.reduce.java.opts</name> <!-- synonym: mapred.reduce.java.opts -->
-    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
-  </property>
- 
-
-2) CP Memory Limitation:
-There exist size limitations for in-memory matrices. Dense in-memory matrices are limited to 16GB 
-independent of their dimension. Sparse in-memory matrices are limited to 2G rows and 2G columns 
-but the overall matrix can be larger. These limitations do only apply to in-memory matrices but 
-NOT in HDFS or involved in MR computations. Setting HADOOP_CLIENT_OPTS below those limitations 
-prevents runtime errors.
-
-3) Transparent Huge Pages (on Red Hat Enterprise Linux 6):
-Hadoop workloads might show very high System CPU utilization if THP is enabled. In case of such 
-behavior, we recommend to disable THP with
-   
-   echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled
-   
-4) JVM Reuse:
-Performance benefits from JVM reuse because data sets that fit into the mapper memory budget are 
-reused across tasks per slot. However, Hadoop 1.0.3 JVM Reuse is incompatible with security (when 
-using the LinuxTaskController). The workaround is to use the DefaultTaskController. SystemML provides 
-a configuration property in $BIGINSIGHTS_HOME/machine-learning/SystemML-config.xml to enable JVM reuse 
-on a per job level without changing the global cluster configuration. 
-   
-   <jvmreuse>false</jvmreuse> 
-   
-5) Number of Reducers:
-The number of reducers can have significant impact on performance. SystemML provides a configuration
-property to set the default number of reducers per job without changing the global cluster configuration.
-In general, we recommend a setting of twice the number of nodes. Smaller numbers create less intermediate
-files, larger numbers increase the degree of parallelism for compute and parallel write. In 
-$BIGINSIGHTS_HOME/machine-learning/SystemML-config.xml, set:
-   
-   <!-- default number of reduce tasks per MR job, default: 2 x number of nodes -->
-   <numreducers>12</numreducers> 
-
-6) SystemML temporary directories:
-SystemML uses temporary directories in two different locations: (1) on local file system for temping from 
-the client process, and (2) on HDFS for intermediate results between different MR jobs and between MR jobs 
-and in-memory operations. Locations of these directories can be configured in 
-$BIGINSIGHTS_HOME/machine-learning/SystemML-config.xml with the following properties
-
-   <!-- local fs tmp working directory-->
-   <localtmpdir>/tmp/systemml</localtmpdir>
-
-   <!-- hdfs tmp working directory--> 
-   <scratch>scratch_space</scratch> 
- 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/docs/Language Reference/README_HADOOP_CONFIG.txt
----------------------------------------------------------------------
diff --git a/docs/Language Reference/README_HADOOP_CONFIG.txt b/docs/Language Reference/README_HADOOP_CONFIG.txt
new file mode 100644
index 0000000..e34d4f3
--- /dev/null
+++ b/docs/Language Reference/README_HADOOP_CONFIG.txt	
@@ -0,0 +1,83 @@
+Usage
+-----
+The machine learning algorithms described in SystemML_Algorithms_Reference.pdf can be invoked
+from the hadoop command line using the described, algorithm-specific parameters. 
+
+Generic command line arguments arguments are provided by the help command below.
+
+   hadoop jar SystemML.jar -? or -help 
+
+
+Recommended configurations
+--------------------------
+1) JVM Heap Sizes: 
+We recommend an equal-sized JVM configuration for clients, mappers, and reducers. For the client
+process this can be done via
+
+   export HADOOP_CLIENT_OPTS="-Xmx2048m -Xms2048m -Xmn256m" 
+   
+where Xmx specifies the maximum heap size, Xms the initial heap size, and Xmn is size of the young 
+generation. For Xmn values of equal or less than 15% of the max heap size, we guarantee the memory budget.
+
+For mapper or reducer JVM configurations, the following properties can be specified in mapred-site.xml,
+where 'child' refers to both mapper and reducer. If map and reduce are specified individually, they take 
+precedence over the generic property.
+
+  <property>
+    <name>mapreduce.child.java.opts</name> <!-- synonym: mapred.child.java.opts -->
+    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
+  </property>
+  <property>
+    <name>mapreduce.map.java.opts</name> <!-- synonym: mapred.map.java.opts -->
+    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.java.opts</name> <!-- synonym: mapred.reduce.java.opts -->
+    <value>-Xmx2048m -Xms2048m -Xmn256m</value>
+  </property>
+ 
+
+2) CP Memory Limitation:
+There exist size limitations for in-memory matrices. Dense in-memory matrices are limited to 16GB 
+independent of their dimension. Sparse in-memory matrices are limited to 2G rows and 2G columns 
+but the overall matrix can be larger. These limitations do only apply to in-memory matrices but 
+NOT in HDFS or involved in MR computations. Setting HADOOP_CLIENT_OPTS below those limitations 
+prevents runtime errors.
+
+3) Transparent Huge Pages (on Red Hat Enterprise Linux 6):
+Hadoop workloads might show very high System CPU utilization if THP is enabled. In case of such 
+behavior, we recommend to disable THP with
+   
+   echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled
+   
+4) JVM Reuse:
+Performance benefits from JVM reuse because data sets that fit into the mapper memory budget are 
+reused across tasks per slot. However, Hadoop 1.0.3 JVM Reuse is incompatible with security (when 
+using the LinuxTaskController). The workaround is to use the DefaultTaskController. SystemML provides 
+a configuration property in SystemML-config.xml to enable JVM reuse on a per job level without
+changing the global cluster configuration.
+   
+   <jvmreuse>false</jvmreuse> 
+   
+5) Number of Reducers:
+The number of reducers can have significant impact on performance. SystemML provides a configuration
+property to set the default number of reducers per job without changing the global cluster configuration.
+In general, we recommend a setting of twice the number of nodes. Smaller numbers create less intermediate
+files, larger numbers increase the degree of parallelism for compute and parallel write. In
+SystemML-config.xml, set:
+   
+   <!-- default number of reduce tasks per MR job, default: 2 x number of nodes -->
+   <numreducers>12</numreducers> 
+
+6) SystemML temporary directories:
+SystemML uses temporary directories in two different locations: (1) on local file system for temping from 
+the client process, and (2) on HDFS for intermediate results between different MR jobs and between MR jobs 
+and in-memory operations. Locations of these directories can be configured in SystemML-config.xml with the
+following properties:
+
+   <!-- local fs tmp working directory-->
+   <localtmpdir>/tmp/systemml</localtmpdir>
+
+   <!-- hdfs tmp working directory--> 
+   <scratch>scratch_space</scratch> 
+ 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ba0e893..ae654dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,11 +25,11 @@
 		<version>17</version>
 	</parent>
 	<groupId>org.apache.systemml</groupId>
-    <version>0.9.0-incubating-SNAPSHOT</version>
-	<artifactId>system-ml</artifactId>
+	<version>0.10.0-incubating-SNAPSHOT</version>
+	<artifactId>systemml</artifactId>
 	<packaging>jar</packaging>
 	<name>SystemML</name>
-	<description>Declarative machine learning</description>
+	<description>Declarative Machine Learning</description>
 	<url>http://systemml.apache.org/</url>
 	<licenses>
 		<license>
@@ -530,7 +530,7 @@
 							</execution>
 
 							<execution>
-								<id>create-binary-distribution-assembly</id>
+								<id>create-binary-cluster-distribution-assembly</id>
 								<phase>package</phase>
 								<goals>
 									<goal>single</goal>
@@ -544,6 +544,25 @@
 							</execution>
 
 							<execution>
+								<id>create-standalone-jar</id>
+								<phase>package</phase>
+								<goals>
+									<goal>single</goal>
+								</goals>
+								<configuration>
+									<descriptors>
+										<descriptor>src/assembly/standalone-jar.xml</descriptor>
+									</descriptors>
+									<archive>
+										<index>true</index>
+										<manifest>
+											<mainClass>org.apache.sysml.api.DMLScript</mainClass>
+										</manifest>
+									</archive>
+								</configuration>
+							</execution>
+
+							<execution>
 								<id>create-binary-standalone-distribution-assembly</id>
 								<phase>package</phase>
 								<goals>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/src/assembly/distrib.xml
----------------------------------------------------------------------
diff --git a/src/assembly/distrib.xml b/src/assembly/distrib.xml
index f752ad3..9d70ee9 100644
--- a/src/assembly/distrib.xml
+++ b/src/assembly/distrib.xml
@@ -17,17 +17,20 @@
  * under the License.
 -->
 <assembly
-	xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+		xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+		xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+	<!-- Assembly file for the "distributed" SystemML release for running on a
+		cluster with Spark or Hadoop. -->
 	<id>distrib</id>
+
 	<formats>
 		<format>tar.gz</format>
 		<format>zip</format>
 	</formats>
-	
+
 	<includeBaseDirectory>true</includeBaseDirectory>
-	<baseDirectory>system-ml-${version}</baseDirectory>
+	<baseDirectory>${artifactId}-${version}</baseDirectory>
 
 	<fileSets>
 		<fileSet>
@@ -37,7 +40,7 @@
 			</includes>
 			<outputDirectory>.</outputDirectory>
 		</fileSet>
-		
+
 		<fileSet>
 			<directory>${basedir}/scripts</directory>
 			<includes>
@@ -84,8 +87,8 @@
 			</includes>
 			<outputDirectory>./algorithms</outputDirectory>
 		</fileSet>
-    
-    <fileSet>
+
+		<fileSet>
 			<directory>${basedir}/scripts/utils</directory>
 			<includes>
 				<include>cbind.dml</include>
@@ -101,40 +104,23 @@
 			</includes>
 			<outputDirectory>./algorithms/utils</outputDirectory>
 		</fileSet>
-		
-		<fileSet>
-			<directory>${basedir}/docs/Language Reference</directory>
-			<includes>
-				<include>SystemML_Language_Reference.html</include>
-				<include>README.txt</include>
-			</includes>
-			<outputDirectory>./docs</outputDirectory>
-		</fileSet>
-		
+
 		<fileSet>
-			<directory>${basedir}/docs/Algorithms Reference</directory>
+			<directory>${basedir}</directory>
 			<includes>
-				<include>SystemML_Algorithms_Reference.pdf</include>
+				<include>DISCLAIMER</include>
+				<include>LICENSE</include>
+				<include>NOTICE</include>
 			</includes>
-			<outputDirectory>./docs</outputDirectory>
+			<outputDirectory>.</outputDirectory>
 		</fileSet>
 	</fileSets>
-	
+
 	<files>
 		<file>
 			<source>target/${artifactId}-${project.version}.jar</source>
 			<outputDirectory>.</outputDirectory>
 			<destName>SystemML.jar</destName>
 		</file>
-		<file>
-			<source>LICENSE</source>
-			<outputDirectory>.</outputDirectory>
-			<destName>LICENSE</destName>
-		</file>
-		<file>
-			<source>NOTICE</source>
-			<outputDirectory>.</outputDirectory>
-			<destName>NOTICE</destName>
-		</file>
 	</files>
 </assembly>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/src/assembly/source.xml
----------------------------------------------------------------------
diff --git a/src/assembly/source.xml b/src/assembly/source.xml
index 1718756..97b59ae 100644
--- a/src/assembly/source.xml
+++ b/src/assembly/source.xml
@@ -20,6 +20,7 @@
         xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+    <!-- Assembly file for the "source" SystemML release containing all source files. -->
     <id>src</id>
     <formats>
         <format>tar.gz</format>
@@ -27,8 +28,7 @@
     </formats>
 
     <includeBaseDirectory>true</includeBaseDirectory>
-    <baseDirectory>system-ml-${version}-src</baseDirectory>
-
+    <baseDirectory>${artifactId}-${version}-src</baseDirectory>
 
     <fileSets>
         <fileSet>
@@ -54,5 +54,4 @@
             </excludes>
         </fileSet>
     </fileSets>
-
 </assembly>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/src/assembly/standalone-jar.xml
----------------------------------------------------------------------
diff --git a/src/assembly/standalone-jar.xml b/src/assembly/standalone-jar.xml
new file mode 100644
index 0000000..184789a
--- /dev/null
+++ b/src/assembly/standalone-jar.xml
@@ -0,0 +1,89 @@
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+-->
+<assembly
+		xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+		xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+	<!-- Assembly file for the "in-memory" SystemML release. -->
+	<id>standalone</id>
+
+	<formats>
+		<format>jar</format>
+	</formats>
+
+	<includeBaseDirectory>false</includeBaseDirectory>
+
+	<fileSets>
+		<fileSet>
+			<directory>${project.build.directory}/hadoop-test</directory>
+			<includes>
+				<include>**/*</include>
+			</includes>
+			<outputDirectory>.</outputDirectory>
+		</fileSet>
+
+		<fileSet>
+			<directory>${basedir}/src/main/standalone</directory>
+			<includes>
+				<include>log4j.properties</include>
+			</includes>
+			<outputDirectory>.</outputDirectory>
+		</fileSet>
+	</fileSets>
+
+	<!-- Include all the libraries needed to run in standalone mode. -->
+	<dependencySets>
+		<dependencySet>
+			<includes>
+				<include>*:wink-json4j*</include>
+				<include>*:antlr*</include>
+			</includes>
+			<scope>compile</scope>
+			<unpack>true</unpack>
+		</dependencySet>
+
+		<dependencySet>
+			<includes>
+				<include>*:avro*</include>
+				<include>*:commons-math3*</include>
+				<include>*:log4j*</include>
+				<include>*:opencsv*</include>
+				<include>*:hadoop-auth*</include>
+				<include>*:hadoop-client*</include>
+				<include>*:hadoop-common*</include>
+				<include>*:hadoop-hdfs*</include>
+				<include>*:hadoop-mapreduce-client*</include>
+				<include>*:hadoop-yarn*</include>
+				<include>*:commons-configuration*</include>
+				<include>*:commons-lang</include>
+				<include>*:commons-logging*</include>
+				<include>*:commons-httpclient*</include>
+				<include>*:commons-cli*</include>
+				<include>*:commons-collections*</include>
+				<include>*:jackson-core-asl*</include>
+				<include>*:jackson-mapper-asl*</include>
+				<include>*:slf4j-api*</include>
+				<include>*:slf4j-log4j*</include>
+				<include>*:${artifactId}*</include>
+			</includes>
+			<scope>provided</scope>
+			<unpack>true</unpack>
+		</dependencySet>
+	</dependencySets>
+</assembly>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/src/assembly/standalone.xml
----------------------------------------------------------------------
diff --git a/src/assembly/standalone.xml b/src/assembly/standalone.xml
index 5979d86..fdb21d0 100644
--- a/src/assembly/standalone.xml
+++ b/src/assembly/standalone.xml
@@ -17,13 +17,11 @@
  * under the License.
 -->
 <assembly
-	xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
-
+		xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+		xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
 	<!-- Assembly file for the "standalone" SystemML release for running on a
 		standalone machine. -->
-
 	<id>standalone</id>
 
 	<formats>
@@ -32,27 +30,10 @@
 	</formats>
 
 	<includeBaseDirectory>true</includeBaseDirectory>
-	<baseDirectory>system-ml-${version}-standalone</baseDirectory>
+	<baseDirectory>${artifactId}-${version}-standalone</baseDirectory>
 
 	<fileSets>
 		<fileSet>
-			<directory>${basedir}/src/assembly/standalone</directory>
-			<includes>
-				<include>LICENSE</include>
-				<include>NOTICE</include>
-			</includes>
-			<outputDirectory>.</outputDirectory>
-		</fileSet>
-
-		<fileSet>
-			<directory>${basedir}</directory>
-			<includes>
-				<include>SystemML-config.xml</include>
-			</includes>
-			<outputDirectory>.</outputDirectory>
-		</fileSet>
-
-		<fileSet>
 			<directory>${basedir}/scripts/algorithms</directory>
 			<includes>
 				<include>GLM-predict.dml</include>
@@ -90,16 +71,16 @@
 			</includes>
 			<outputDirectory>./scripts/algorithms</outputDirectory>
 		</fileSet>
-		
+
 		<fileSet>
 			<directory>${basedir}/scripts/datagen</directory>
 			<includes>
 				<include>genLinearRegressionData.dml</include>
 			</includes>
 			<outputDirectory>./scripts/datagen</outputDirectory>
-		</fileSet>	
-    
-    <fileSet>
+		</fileSet>
+
+		<fileSet>
 			<directory>${basedir}/scripts/utils</directory>
 			<includes>
 				<include>cbind.dml</include>
@@ -115,23 +96,6 @@
 		</fileSet>
 
 		<fileSet>
-			<directory>${basedir}/docs/Language Reference</directory>
-			<includes>
-				<include>SystemML_Language_Reference.html</include>
-				<include>README.txt</include>
-			</includes>
-			<outputDirectory>./docs</outputDirectory>
-		</fileSet>
-
-		<fileSet>
-			<directory>${basedir}/docs/Algorithms Reference</directory>
-			<includes>
-				<include>SystemML_Algorithms_Reference.pdf</include>
-			</includes>
-			<outputDirectory>./docs</outputDirectory>
-		</fileSet>
-
-		<fileSet>
 			<directory>${basedir}/src/main/standalone</directory>
 			<includes>
 				<include>log4j.properties</include>
@@ -140,7 +104,7 @@
 			</includes>
 			<outputDirectory>.</outputDirectory>
 		</fileSet>
-		
+
 		<fileSet>
 			<directory>${basedir}/src/test/config/hadoop_bin_windows/bin</directory>
 			<includes>
@@ -148,7 +112,7 @@
 			</includes>
 			<outputDirectory>./lib/hadoop/bin</outputDirectory>
 		</fileSet>
-		
+
 		<!--  Make scripts executable. -->
 		<fileSet>
 			<directory>${basedir}/src/main/standalone</directory>
@@ -159,31 +123,44 @@
 			<outputDirectory>.</outputDirectory>
 			<fileMode>0755</fileMode>
 		</fileSet>
+
+		<fileSet>
+			<directory>${basedir}/src/assembly/standalone</directory>
+			<includes>
+				<include>LICENSE</include>
+				<include>NOTICE</include>
+			</includes>
+			<outputDirectory>.</outputDirectory>
+		</fileSet>
+
+		<fileSet>
+			<directory>${basedir}</directory>
+			<includes>
+				<include>DISCLAIMER</include>
+			</includes>
+			<outputDirectory>.</outputDirectory>
+		</fileSet>
 	</fileSets>
 
-	<!-- 
-	<files>
+	<!--files>
 		<file>
-			<source>target/${artifactId}-${project.version}.jar</source>
+			<source>target/${artifactId}-${project.version}-standalone.jar</source>
 			<outputDirectory>.</outputDirectory>
-			<destName>SystemML.jar</destName> 
+			<destName>SystemML-standalone.jar</destName>
 		</file>
-	</files>
-	-->
-	
-	<!--  Include all the libraries needed to run in standalone mode. -->
-	
+	</files-->
+
+	<!-- Include all the libraries needed to run in standalone mode. -->
 	<dependencySets>
-	
 		<dependencySet>
 			<includes>
-				<include>*:JSON4J*</include>
+				<include>*:wink-json4j*</include>
 				<include>*:antlr*</include>
 			</includes>
 			<outputDirectory>./lib</outputDirectory>
 			<scope>compile</scope>
 		</dependencySet>
-	
+
 		<dependencySet>
 			<includes>
 				<include>*:avro*</include>
@@ -206,11 +183,10 @@
 				<include>*:jackson-mapper-asl*</include>
 				<include>*:slf4j-api*</include>
 				<include>*:slf4j-log4j*</include>
-				<include>*:system-ml*</include>
+				<include>*:${artifactId}*</include>
 			</includes>
 			<outputDirectory>./lib</outputDirectory>
 			<scope>provided</scope>
 		</dependencySet>
 	</dependencySets>
-
 </assembly>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d766fbff/src/main/standalone/log4j.properties
----------------------------------------------------------------------
diff --git a/src/main/standalone/log4j.properties b/src/main/standalone/log4j.properties
index a37845c..d73f680 100644
--- a/src/main/standalone/log4j.properties
+++ b/src/main/standalone/log4j.properties
@@ -280,5 +280,5 @@ log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduc
 log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
 
 // setup of the logging level for various components
-log4j.logger.org.apache.hadoop=WARN
-log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
\ No newline at end of file
+log4j.logger.org.apache.hadoop=ERROR
+log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=ERROR


[41/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/random-forest.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/random-forest.dml b/scripts/algorithms/random-forest.dml
index 7bdc1fb..b68d711 100644
--- a/scripts/algorithms/random-forest.dml
+++ b/scripts/algorithms/random-forest.dml
@@ -1,1375 +1,1375 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT IMPLEMENTS CLASSIFICATION RANDOM FOREST WITH BOTH SCALE AND CATEGORICAL FEATURES
-#
-# INPUT         		PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME          		TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X             		String   ---          Location to read feature matrix X; note that X needs to be both recoded and dummy coded 
-# Y 					String   ---		  Location to read label matrix Y; note that Y needs to be both recoded and dummy coded
-# R   	  				String   " "	      Location to read the matrix R which for each feature in X contains the following information 
-#												- R[,1]: column ids
-#												- R[,2]: start indices 
-#												- R[,3]: end indices
-#											  If R is not provided by default all variables are assumed to be scale
-# bins          		Int 	 20			  Number of equiheight bins per scale feature to choose thresholds
-# depth         		Int 	 25			  Maximum depth of the learned tree
-# num_leaf      		Int      10           Number of samples when splitting stops and a leaf node is added
-# num_samples   		Int 	 3000		  Number of samples at which point we switch to in-memory subtree building
-# num_trees     		Int 	 10			  Number of trees to be learned in the random forest model
-# subsamp_rate  		Double   1.0		  Parameter controlling the size of each tree in the forest; samples are selected from a 
-#											  Poisson distribution with parameter subsamp_rate (the default value is 1.0)
-# feature_subset    	Double   0.5    	  Parameter that controls the number of feature used as candidates for splitting at each tree node 
-#											  as a power of number of features in the dataset;
-#											  by default square root of features (i.e., feature_subset = 0.5) are used at each tree node 
-# impurity      		String   "Gini"    	  Impurity measure: entropy or Gini (the default)
-# M             		String 	 ---	   	  Location to write matrix M containing the learned tree
-# C 					String   " "		  Location to write matrix C containing the number of times samples are chosen in each tree of the random forest 
-# S_map					String   " "		  Location to write the mappings from scale feature ids to global feature ids
-# C_map					String   " "		  Location to write the mappings from categorical feature ids to global feature ids
-# fmt     	    		String   "text"       The output format of the model (matrix M), such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# Matrix M where each column corresponds to a node in the learned tree and each row contains the following information:
-#	 M[1,j]: id of node j (in a complete binary tree)
-#	 M[2,j]: tree id to which node j belongs
-#	 M[3,j]: Offset (no. of columns) to left child of j 
-#	 M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
-#	 M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
-#		     otherwise the label that leaf node j is supposed to predict
-#	 M[6,j]: 1 if j is an internal node and the feature chosen for j is scale, otherwise the size of the subset of values 
-#			 stored in rows 7,8,... if j is categorical
-#	 M[7:,j]: Only applicable for internal nodes. Threshold the example's feature value is compared to is stored at M[7,j] if the feature chosen for j is scale;
-# 			  If the feature chosen for j is categorical rows 7,8,... depict the value subset chosen for j   
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f random-forest.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=OUTPUT_DIR/model
-#     				                 				   bins=20 depth=25 num_leaf=10 num_samples=3000 num_trees=10 impurity=Gini fmt=csv
-
-
-# External function for binning
-binning = externalFunction(Matrix[Double] A, Integer binsize, Integer numbins) return (Matrix[Double] B, Integer numbinsdef) 
-	implemented in (classname="org.apache.sysml.udf.lib.BinningWrapper",exectype="mem")
-
-	
-# Default values of some parameters	
-fileR = ifdef ($R, " ");
-fileC = ifdef ($C, " ");
-fileS_map = ifdef ($S_map, " ");
-fileC_map = ifdef ($C_map, " ");
-fileM = $M;	
-num_bins = ifdef($bins, 20); 
-depth = ifdef($depth, 25);
-num_leaf = ifdef($num_leaf, 10);
-num_trees = ifdef($num_trees, 1); 
-threshold = ifdef ($num_samples, 3000);
-imp = ifdef($impurity, "Gini");
-rate = ifdef ($subsamp_rate, 1);
-fpow = ifdef ($feature_subset, 0.5);
-fmtO = ifdef($fmt, "text");
-
-X = read($X);
-Y_bin = read($Y);
-num_records = nrow (X);
-num_classes = ncol (Y_bin);
-
-# check if there is only one class label
-Y_bin_sum = sum (ppred (colSums (Y_bin), num_records, "=="));
-if (Y_bin_sum == 1) {
-	stop ("Y contains only one class label. No model will be learned!");
-} else if (Y_bin_sum > 1) {
-	stop ("Y is not properly dummy coded. Multiple columns of Y contain only ones!")
-}
-
-# split data into X_scale and X_cat
-if (fileR != " ") {
-	R = read (fileR);
-	R = order (target = R, by = 2); # sort by start indices
-	dummy_coded = ppred (R[,2], R[,3], "!=");
-	R_scale = removeEmpty (target = R[,2:3] * (1 - dummy_coded), margin = "rows");
-	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
-	if (fileS_map != " ") {
-		scale_feature_mapping = removeEmpty (target = (1 - dummy_coded) * seq (1, nrow (R)), margin = "rows");
-		write (scale_feature_mapping, fileS_map, format = fmtO);
-	}
-	if (fileC_map != " ") {
-		cat_feature_mapping = removeEmpty (target = dummy_coded * seq (1, nrow (R)), margin = "rows");	
-		write (cat_feature_mapping, fileC_map, format = fmtO);		
-	}
-	sum_dummy = sum (dummy_coded);	
-	if (sum_dummy == nrow (R)) { # all features categorical
-		print ("All features categorical");
-		num_cat_features = nrow (R_cat);
-		num_scale_features = 0;
-		X_cat = X;	
-		distinct_values = t (R_cat[,2] - R_cat[,1] + 1);
-		distinct_values_max = max (distinct_values);
-		distinct_values_offset = cumsum (t (distinct_values));
-		distinct_values_overall = sum (distinct_values);
-	} else if (sum_dummy == 0) { # all features scale
-		print ("All features scale");
-		num_scale_features = ncol (X);
-		num_cat_features = 0;
-		X_scale = X;
-		distinct_values_max = 1;
-	} else { # some features scale some features categorical 
-		num_cat_features = nrow (R_cat);
-		num_scale_features = nrow (R_scale);
-		distinct_values = t (R_cat[,2] - R_cat[,1] + 1);
-		distinct_values_max = max (distinct_values);
-		distinct_values_offset = cumsum (t (distinct_values));
-		distinct_values_overall = sum (distinct_values);
-		
-		W = matrix (1, rows = num_cat_features, cols = 1) %*% matrix ("1 -1", rows = 1, cols = 2);
-		W = matrix (W, rows = 2 * num_cat_features, cols = 1);
-		if (as.scalar (R_cat[num_cat_features, 2]) == ncol (X)) {
-			W[2 * num_cat_features,] = 0;
-		}
-		
-		last = ppred (R_cat[,2], ncol (X), "!=");
-		R_cat1 = (R_cat[,2] + 1) * last;
-		R_cat[,2] = (R_cat[,2] * (1 - last)) + R_cat1;
-		R_cat_vec = matrix (R_cat, rows = 2 * num_cat_features, cols = 1);	
-
-		col_tab = table (R_cat_vec, 1, W, ncol (X), 1);
-		col_ind = cumsum (col_tab);
-		
-		col_ind_cat = removeEmpty (target = col_ind * seq (1, ncol (X)), margin = "rows");
-		col_ind_scale = removeEmpty (target = (1 - col_ind) * seq (1, ncol (X)), margin = "rows");	
-		X_cat = X %*% table (col_ind_cat, seq (1, nrow (col_ind_cat)), ncol (X), nrow (col_ind_cat));
-		X_scale = X %*% table (col_ind_scale, seq (1, nrow (col_ind_scale)), ncol (X), nrow (col_ind_scale));		
-	}	
-} else { # only scale features exist
-	print ("All features scale");
-	num_scale_features = ncol (X);
-	num_cat_features = 0;
-	X_scale = X;
-	distinct_values_max = 1;
-}	
-
-if (num_scale_features > 0) {
-
-	print ("COMPUTING BINNING...");
-	bin_size = max (as.integer (num_records / num_bins), 1);
-	count_thresholds = matrix (0, rows = 1, cols = num_scale_features)
-	thresholds = matrix (0, rows = num_bins + 1, cols = num_scale_features)
-	parfor(i1 in 1:num_scale_features) { 
-		col = order (target = X_scale[,i1], by = 1, decreasing = FALSE);
-		[col_bins, num_bins_defined] = binning (col, bin_size, num_bins);
-		count_thresholds[,i1] = num_bins_defined;
-		thresholds[,i1] = col_bins;	
-	}
-	
-	print ("PREPROCESSING SCALE FEATURE MATRIX...");
-	min_num_bins = min (count_thresholds);
-	max_num_bins = max (count_thresholds);
-	total_num_bins = sum (count_thresholds);
-	cum_count_thresholds = t (cumsum (t (count_thresholds)));
-	X_scale_ext = matrix (0, rows = num_records, cols = total_num_bins);
-	parfor (i2 in 1:num_scale_features, check = 0) { 
-		Xi2 = X_scale[,i2];
-		count_threshold = as.scalar (count_thresholds[,i2]);
-		offset_feature = 1;
-		if (i2 > 1) {
-			offset_feature = offset_feature + as.integer (as.scalar (cum_count_thresholds[, (i2 - 1)]));
-		}
-
-		ti2 = t(thresholds[1:count_threshold, i2]);
-		X_scale_ext[,offset_feature:(offset_feature + count_threshold - 1)] = outer (Xi2, ti2, "<");
-	}
-}
-
-num_features_total = num_scale_features + num_cat_features;
-num_feature_samples = as.integer (floor (num_features_total ^ fpow));
-
-##### INITIALIZATION
-L = matrix (1, rows = num_records, cols = num_trees); # last visited node id for each training sample
-
-# create matrix of counts (generated by Poisson distribution) storing how many times each sample appears in each tree
-print ("CONPUTING COUNTS...");
-C = rand (rows = num_records, cols = num_trees, pdf = "poisson", lambda = rate);
-Ix_nonzero = ppred (C, 0, "!=");
-L = L * Ix_nonzero;
-total_counts = sum (C);
-
-
-# model
-# LARGE leaf nodes
-# NC_large[,1]: node id
-# NC_large[,2]: tree id
-# NC_large[,3]: class label
-# NC_large[,4]: no. of misclassified samples 
-# NC_large[,5]: 1 if special leaf (impure and 3 samples at that leaf > threshold) or 0 otherwise 
-NC_large = matrix (0, rows = 5, cols = 1); 
-
-# SMALL leaf nodes 
-# same schema as for LARGE leaf nodes (to be initialized)
-NC_small = matrix (0, rows = 5, cols = 1); 
-
-# LARGE internal nodes
-# Q_large[,1]: node id
-# Q_large[,2]: tree id
-Q_large = matrix (0, rows = 2, cols = num_trees); 
-Q_large[1,] = matrix (1, rows = 1, cols = num_trees);
-Q_large[2,] = t (seq (1, num_trees));
-
-# SMALL internal nodes
-# same schema as for LARGE internal nodes (to be initialized)
-Q_small = matrix (0, rows = 2, cols = 1); 
-
-# F_large[,1]: feature
-# F_large[,2]: type
-# F_large[,3]: offset 
-F_large = matrix (0, rows = 3, cols = 1);
-
-# same schema as for LARGE nodes
-F_small = matrix (0, rows = 3, cols = 1); 
-
-# split points for LARGE internal nodes
-S_large = matrix (0, rows = 1, cols = 1);
-
-# split points for SMALL internal nodes 
-S_small = matrix (0, rows = 1, cols = 1); 
-
-# initialize queue
-cur_nodes_large = matrix (1, rows = 2, cols = num_trees);
-cur_nodes_large[2,] = t (seq (1, num_trees));
-
-num_cur_nodes_large = num_trees;
-num_cur_nodes_small = 0;
-level = 0;
-
-while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) {
-	
-	level = level + 1;
-	print (" --- start level " + level + " --- ");
-	
-	##### PREPARE MODEL
-	if (num_cur_nodes_large > 0) { # LARGE nodes to process
-		cur_Q_large = matrix (0, rows = 2, cols = 2 * num_cur_nodes_large);
-		cur_NC_large = matrix (0, rows = 5, cols = 2 * num_cur_nodes_large); 
-		cur_F_large = matrix (0, rows = 3, cols = num_cur_nodes_large); 
-		cur_S_large = matrix (0, rows = 1, cols = num_cur_nodes_large * distinct_values_max); 
-		cur_nodes_small = matrix (0, rows = 3, cols = 2 * num_cur_nodes_large); 
-	}
-
-	##### LOOP OVER LARGE NODES...
-	parfor (i6 in 1:num_cur_nodes_large, check = 0) { 
-	
-		cur_node = as.scalar (cur_nodes_large[1,i6]);
-		cur_tree = as.scalar (cur_nodes_large[2,i6]);
-			
-		# select sample features WOR
-		feature_samples = sample (num_features_total, num_feature_samples);
-		feature_samples = order (target = feature_samples, by = 1);
-		num_scale_feature_samples = sum (ppred (feature_samples, num_scale_features, "<="));
-		num_cat_feature_samples = num_feature_samples - num_scale_feature_samples;
-		
-		# --- find best split ---
-		# samples that reach cur_node 
-		Ix = ppred (L[,cur_tree], cur_node, "==");		
-		
-		cur_Y_bin = Y_bin * (Ix * C[,cur_tree]);
-		label_counts_overall = colSums (cur_Y_bin);
-		label_sum_overall = sum (label_counts_overall);
-		label_dist_overall = label_counts_overall / label_sum_overall;
-
-		if (imp == "entropy") {
-			label_dist_zero = ppred (label_dist_overall, 0, "==");
-			cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2); # impurity before
-		} else { # imp == "Gini"
-			cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); # impurity before
-		}
-		best_scale_gain = 0;
-		best_cat_gain = 0;
-		if (num_scale_features > 0 & num_scale_feature_samples > 0) {
-			
-			scale_feature_samples = feature_samples[1:num_scale_feature_samples,];
-			
-			# main operation	
-			label_counts_left_scale = t (t (cur_Y_bin) %*% X_scale_ext); 
-		
-			# compute left and right label distribution
-			label_sum_left = rowSums (label_counts_left_scale);
-			label_dist_left = label_counts_left_scale / label_sum_left;
-			if (imp == "entropy") {
-				label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
-				log_label_dist_left = log (label_dist_left); # / log (2)
-				impurity_left_scale = - rowSums (label_dist_left * log_label_dist_left); 
-			} else { # imp == "Gini"
-				impurity_left_scale = rowSums (label_dist_left * (1 - label_dist_left)); 
-			}
-			#
-			label_counts_right_scale = - label_counts_left_scale + label_counts_overall; 
-			label_sum_right = rowSums (label_counts_right_scale);
-			label_dist_right = label_counts_right_scale / label_sum_right;
-			if (imp == "entropy") {
-				label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
-				log_label_dist_right = log (label_dist_right); # / log (2)
-				impurity_right_scale = - rowSums (label_dist_right * log_label_dist_right); 		
-			} else { # imp == "Gini"
-				impurity_right_scale = rowSums (label_dist_right * (1 - label_dist_right)); 
-			}
-			
-			I_gain_scale = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left_scale + ( label_sum_right / label_sum_overall ) * impurity_right_scale); 
-		
-			I_gain_scale = replace (target = I_gain_scale, pattern = "NaN", replacement = 0);	
-			
-			# determine best feature to split on and the split value
-			feature_start_ind = matrix (0, rows = 1, cols = num_scale_features);
-			feature_start_ind[1,1] = 1;
-			if (num_scale_features > 1) {
-				feature_start_ind[1,2:num_scale_features] = cum_count_thresholds[1,1:(num_scale_features - 1)] + 1;
-			}
-			max_I_gain_found = 0;
-			max_I_gain_found_ind = 0;
-			best_i = 0;
-			
-			for (i in 1:num_scale_feature_samples) { # assuming feature_samples is 5x1
-				cur_feature_samples_bin = as.scalar (scale_feature_samples[i,]); 
-				cur_start_ind = as.scalar (feature_start_ind[,cur_feature_samples_bin]);
-				cur_end_ind = as.scalar (cum_count_thresholds[,cur_feature_samples_bin]);
-				I_gain_portion = I_gain_scale[cur_start_ind:cur_end_ind,];
-				cur_max_I_gain = max (I_gain_portion);
-				cur_max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain_portion)));
-				if (cur_max_I_gain > max_I_gain_found) {
-					max_I_gain_found = cur_max_I_gain;
-					max_I_gain_found_ind = cur_max_I_gain_ind;
-					best_i = i;
-				}
-			}
-
-			best_scale_gain = max_I_gain_found;
-			max_I_gain_ind_scale = max_I_gain_found_ind;
-			best_scale_feature = 0;
-			if (best_i > 0) {
-				best_scale_feature = as.scalar (scale_feature_samples[best_i,]);
-			}
-			best_scale_split = max_I_gain_ind_scale;
-			if (best_scale_feature > 1) {
-				best_scale_split = best_scale_split + as.scalar(cum_count_thresholds[,(best_scale_feature - 1)]);
-			}					
-		}
-	
-		if (num_cat_features > 0 & num_cat_feature_samples > 0){
-			
-			cat_feature_samples = feature_samples[(num_scale_feature_samples + 1):(num_scale_feature_samples + num_cat_feature_samples),] - num_scale_features;
-			
-			# initialization
-			split_values_bin = matrix (0, rows = 1, cols = distinct_values_overall); 
-			split_values = split_values_bin; 
-			split_values_offset = matrix (0, rows = 1, cols = num_cat_features); 
-			I_gains = split_values_offset; 
-			impurities_left = split_values_offset;
-			impurities_right = split_values_offset;
-			best_label_counts_left = matrix (0, rows = num_cat_features, cols = num_classes);
-			best_label_counts_right = matrix (0, rows = num_cat_features, cols = num_classes);
-			
-			# main operation
-			label_counts = t (t (cur_Y_bin) %*% X_cat);  			
-			
-			parfor (i9 in 1:num_cat_feature_samples, check = 0){
-			
-				cur_cat_feature = as.scalar (cat_feature_samples[i9,1]);
-				start_ind = 1;
-				if (cur_cat_feature > 1) {
-					start_ind = start_ind + as.scalar (distinct_values_offset[(cur_cat_feature - 1),]);
-				}
-				offset = as.scalar (distinct_values[1,cur_cat_feature]);
-				
-				cur_label_counts = label_counts[start_ind:(start_ind + offset - 1),];
-								
-				label_sum = rowSums (cur_label_counts);
-				label_dist = cur_label_counts / label_sum;
-				if (imp == "entropy") {
-					label_dist = replace (target = label_dist, pattern = 0, replacement = 1);
-					log_label_dist = log (label_dist); # / log(2)
-					impurity = - rowSums (label_dist * log_label_dist); 
-					impurity = replace (target = impurity, pattern = "NaN", replacement = 1/0); 
-				} else { # imp == "Gini"
-					impurity = rowSums (label_dist * (1 - label_dist)); 				
-				}
-			
-				# sort cur feature by impurity
-				cur_distinct_values = seq (1, nrow (cur_label_counts));
-				cur_distinct_values_impurity = append (cur_distinct_values, impurity);
-				cur_feature_sorted = order (target = cur_distinct_values_impurity, by = 2, decreasing = FALSE);
-				P = table (cur_distinct_values, cur_feature_sorted); # permutation matrix
-				label_counts_sorted = P %*% cur_label_counts;
-
-				# compute left and right label distribution			
-				label_counts_left = cumsum (label_counts_sorted);
-			
-				label_sum_left = rowSums (label_counts_left);
-				label_dist_left = label_counts_left / label_sum_left;
-				label_dist_left = replace (target = label_dist_left, pattern = "NaN", replacement = 1);
-				if (imp == "entropy") {
-					label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
-					log_label_dist_left = log (label_dist_left); # / log(2)
-					impurity_left = - rowSums (label_dist_left * log_label_dist_left);
-				} else { # imp == "Gini"
-					impurity_left = rowSums (label_dist_left * (1 - label_dist_left));					
-				}
-				#
-				label_counts_right = - label_counts_left + label_counts_overall;
-				label_sum_right = rowSums (label_counts_right);
-				label_dist_right = label_counts_right / label_sum_right;
-				label_dist_right = replace (target = label_dist_right, pattern = "NaN", replacement = 1);
-				if (imp == "entropy") {
-					label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
-					log_label_dist_right = log (label_dist_right); # / log (2)
-					impurity_right = - rowSums (label_dist_right * log_label_dist_right);			
-				} else { # imp == "Gini"
-					impurity_right = rowSums (label_dist_right * (1 - label_dist_right));					
-				}
-				I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right);
-			
-				Ix_label_sum_left_zero = ppred (label_sum_left, 0, "==");
-				Ix_label_sum_right_zero = ppred (label_sum_right, 0, "==");
-				Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero;
-				I_gain = I_gain * (1 - Ix_label_sum_zero);	
-				
-				I_gain[nrow (I_gain),] = 0; # last entry invalid
-			
-				max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain)));
-
-				split_values[1, start_ind:(start_ind + max_I_gain_ind - 1)] = t (cur_feature_sorted[1:max_I_gain_ind,1]);
-				for (i10 in 1:max_I_gain_ind) {
-					ind = as.scalar (cur_feature_sorted[i10,1]);
-					if (ind == 1) {
-						split_values_bin[1,start_ind] = 1.0; 
-					} else {
-						split_values_bin[1,(start_ind + ind - 1)] = 1.0;
-					}
-				}
-				split_values_offset[1,cur_cat_feature] = max_I_gain_ind;
-			
-				I_gains[1,cur_cat_feature] = max (I_gain);
-			
-				impurities_left[1,cur_cat_feature] = as.scalar (impurity_left[max_I_gain_ind,]);
-				impurities_right[1,cur_cat_feature] = as.scalar (impurity_right[max_I_gain_ind,]);
-				best_label_counts_left[cur_cat_feature,] = label_counts_left[max_I_gain_ind,];
-				best_label_counts_right[cur_cat_feature,] = label_counts_right[max_I_gain_ind,];				
-			}
-			
-			# determine best feature to split on and the split values
-			best_cat_feature = as.scalar (rowIndexMax (I_gains));
-			best_cat_gain = max (I_gains);
-			start_ind = 1;
-			if (best_cat_feature > 1) {
-				start_ind = start_ind + as.scalar (distinct_values_offset[(best_cat_feature - 1),]);
-			}
-			offset = as.scalar (distinct_values[1,best_cat_feature]);
-			best_split_values_bin = split_values_bin[1, start_ind:(start_ind + offset - 1)];		
-		}		
-	
-		# compare best scale feature to best cat. feature and pick the best one
-		if (num_scale_features > 0 & num_scale_feature_samples > 0 & best_scale_gain >= best_cat_gain & best_scale_gain > 0) {
-			
-			# --- update model ---
-			cur_F_large[1,i6] = best_scale_feature;
-			cur_F_large[2,i6] = 1;
-			cur_F_large[3,i6] = 1;			
-			cur_S_large[1,(i6 - 1) * distinct_values_max + 1] = thresholds[max_I_gain_ind_scale, best_scale_feature]; 
-				
-			left_child = 2 * (cur_node - 1) + 1 + 1;
-			right_child = 2 * (cur_node - 1) + 2 + 1;
-					
-			# samples going to the left subtree
-			Ix_left = X_scale_ext[,best_scale_split]; 
-											
-			Ix_left = Ix * Ix_left;		
-			Ix_right = Ix * (1 - Ix_left);
-				
-			L[,cur_tree] = L[,cur_tree] * (1 - Ix_left) + (Ix_left * left_child);
-			L[,cur_tree] = L[,cur_tree] * (1 - Ix_right) + (Ix_right * right_child);								
-			
-			left_child_size = sum (Ix_left * C[,cur_tree]);
-			right_child_size = sum (Ix_right * C[,cur_tree]);
-			
-			# check if left or right child is a leaf
-			left_pure = FALSE;
-			right_pure = FALSE;
-			cur_impurity_left = as.scalar(impurity_left_scale[best_scale_split,]); # max_I_gain_ind_scale
-			cur_impurity_right = as.scalar(impurity_right_scale[best_scale_split,]); # max_I_gain_ind_scale
-			if ( (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth)) & 
-			   (right_child_size <= num_leaf | cur_impurity_right == 0 | (level == depth)) | 
-			   (left_child_size <= threshold & right_child_size <= threshold & (level == depth)) ) { # both left and right nodes are leaf	
-				
-				cur_label_counts_left = label_counts_left_scale[best_scale_split,]; # max_I_gain_ind_scale
-				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
-				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
-				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label
-				left_pure = TRUE;	
-				# compute number of misclassified points 
-				cur_NC_large[4,(2 * (i6 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
-				
-				cur_label_counts_right = label_counts_overall - cur_label_counts_left;
-				cur_NC_large[1,(2 * i6)] = right_child; 
-				cur_NC_large[2,(2 * i6)] = cur_tree;
-				cur_NC_large[3,(2 * i6)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-				right_pure = TRUE;	
-				# compute number of misclassified pints
-				cur_NC_large[4,(2 * i6)] = right_child_size - max (cur_label_counts_right); 
-				
-			} else if (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth) | 
-					  (left_child_size <= threshold & (level == depth))) {	
-				
-				cur_label_counts_left = label_counts_left_scale[best_scale_split,]; # max_I_gain_ind_scale
-				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
-				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
-				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label				
-				left_pure = TRUE;
-				# compute number of misclassified points 
-				cur_NC_large[4,(2 * (i6 - 1) + 1)] = left_child_size - max (cur_label_counts_left);
-				
-			} else if (right_child_size <= num_leaf | cur_impurity_right == 0 | (level == depth) |
-					  (right_child_size <= threshold & (level == depth))) {
-					  
-				cur_label_counts_right = label_counts_right_scale[best_scale_split,]; # max_I_gain_ind_scale
-				cur_NC_large[1,(2 * i6)] = right_child; 
-				cur_NC_large[2,(2 * i6)] = cur_tree;
-				cur_NC_large[3,(2 * i6)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-				right_pure = TRUE;
-				# compute number of misclassified pints
-				cur_NC_large[4,(2 * i6)] = right_child_size - max (cur_label_counts_right);
-				
-			}
-		
-		} else if (num_cat_features > 0 & num_cat_feature_samples > 0 & best_cat_gain > 0) {
-			
-			# --- update model ---
-			cur_F_large[1,i6] = best_cat_feature;
-			cur_F_large[2,i6] = 2;
-			offset_nonzero = as.scalar (split_values_offset[1,best_cat_feature]);
-			S_start_ind = (i6 - 1) * distinct_values_max + 1;
-			cur_F_large[3,i6] = offset_nonzero;
-			cur_S_large[1,S_start_ind:(S_start_ind + offset_nonzero - 1)] = split_values[1,start_ind:(start_ind + offset_nonzero - 1)];		
-		
-			left_child = 2 * (cur_node - 1) + 1 + 1;
-			right_child = 2 * (cur_node - 1) + 2 + 1;
-					
-			# samples going to the left subtree
-			Ix_left = rowSums (X_cat[,start_ind:(start_ind + offset - 1)] * best_split_values_bin);
-			Ix_left = ppred (Ix_left, 1, ">=");
-											  
-			Ix_left = Ix * Ix_left;		
-			Ix_right = Ix * (1 - Ix_left);
-			
-			L[,cur_tree] = L[,cur_tree] * (1 - Ix_left) + (Ix_left * left_child);
-			L[,cur_tree] = L[,cur_tree] * (1 - Ix_right) + (Ix_right * right_child);								
-			
-			left_child_size = sum (Ix_left * C[,cur_tree]);
-			right_child_size = sum (Ix_right * C[,cur_tree]);
-			
-			# check if left or right child is a leaf
-			left_pure = FALSE;
-			right_pure = FALSE;
-			cur_impurity_left = as.scalar(impurities_left[,best_cat_feature]); 
-			cur_impurity_right = as.scalar(impurities_right[,best_cat_feature]); 
-			if ( (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth)) & 
-			   (right_child_size <= num_leaf | cur_impurity_right == 0 | (level == depth)) | 
-			   (left_child_size <= threshold & right_child_size <= threshold & (level == depth)) ) { # both left and right nodes are leaf	
-				
-				cur_label_counts_left = best_label_counts_left[best_cat_feature,];
-				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
-				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
-				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label
-				left_pure = TRUE;
-				# compute number of misclassified points 
-				cur_NC_large[4,(2 * (i6 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
-				
-				cur_label_counts_right = label_counts_overall - cur_label_counts_left;
-				cur_NC_large[1,(2 * i6)] = right_child; 
-				cur_NC_large[2,(2 * i6)] = cur_tree;	
-				cur_NC_large[3,(2 * i6)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-				right_pure = TRUE;
-				# compute number of misclassified pints
-				cur_NC_large[4,(2 * i6)] = right_child_size - max (cur_label_counts_right);			
-			
-			} else if (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth) |
-					  (left_child_size <= threshold & (level == depth))) {	
-				
-				cur_label_counts_left = best_label_counts_left[best_cat_feature,];
-				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
-				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;	
-				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label				
-				left_pure = TRUE;
-				# compute number of misclassified points 
-				cur_NC_large[4,(2 * (i6 - 1) + 1)] = left_child_size - max (cur_label_counts_left);				
-			
-			} else if (right_child_size <= num_leaf | cur_impurity_right == 0 | (level == depth) |
-					  (right_child_size <= threshold & (level == depth))) {
-				
-				cur_label_counts_right = best_label_counts_right[best_cat_feature,];
-				cur_NC_large[1,(2 * i6)] = right_child; 
-				cur_NC_large[2,(2 * i6)] = cur_tree;
-				cur_NC_large[3,(2 * i6)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-				right_pure = TRUE;
-				# compute number of misclassified pints
-				cur_NC_large[4,(2 * i6)] = right_child_size - max (cur_label_counts_right);		
-			
-			}		
-		} else {
-			
-			print ("NUMBER OF SAMPLES AT NODE " + cur_node + " in tree " + cur_tree + " CANNOT BE REDUCED TO MATCH " + num_leaf + ". THIS NODE IS DECLARED AS LEAF!");
-			right_pure = TRUE;
-			left_pure = TRUE;
-			cur_NC_large[1,(2 * (i6 - 1) + 1)] = cur_node;
-			cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
-			class_label = as.scalar (rowIndexMax (label_counts_overall));
-			cur_NC_large[3,(2 * (i6 - 1) + 1)] = class_label;
-			cur_NC_large[4,(2 * (i6 - 1) + 1)] = label_sum_overall - max (label_counts_overall);
-			cur_NC_large[5,(2 * (i6 - 1) + 1)] = 1; # special leaf	
-						
-		}
-		
-		# add nodes to Q
-		if (!left_pure) {
-			if (left_child_size > threshold) {
-				cur_Q_large[1,(2 * (i6 - 1)+ 1)] = left_child; 
-				cur_Q_large[2,(2 * (i6 - 1)+ 1)] = cur_tree; 
-			} else {
-				cur_nodes_small[1,(2 * (i6 - 1)+ 1)] = left_child;
-				cur_nodes_small[2,(2 * (i6 - 1)+ 1)] = left_child_size;
-				cur_nodes_small[3,(2 * (i6 - 1)+ 1)] = cur_tree;				
-			}
-		}
-		if (!right_pure) {
-			if (right_child_size > threshold) {
-				cur_Q_large[1,(2 * i6)] = right_child;
-				cur_Q_large[2,(2 * i6)] = cur_tree;				
-			} else{
-				cur_nodes_small[1,(2 * i6)] = right_child;
-				cur_nodes_small[2,(2 * i6)] = right_child_size;
-				cur_nodes_small[3,(2 * i6)] = cur_tree;								
-			}	
-		}
-	}
-	
-	##### PREPARE MODEL FOR LARGE NODES
-	if (num_cur_nodes_large > 0) {
-		cur_Q_large = removeEmpty (target = cur_Q_large, margin = "cols");
-		if (as.scalar (cur_Q_large[1,1]) != 0) Q_large = append (Q_large, cur_Q_large);
-		cur_NC_large = removeEmpty (target = cur_NC_large, margin = "cols");
-		if (as.scalar (cur_NC_large[1,1]) != 0) NC_large = append (NC_large, cur_NC_large);
-
-		cur_F_large = removeEmpty (target = cur_F_large, margin = "cols");
-		if (as.scalar (cur_F_large[1,1]) != 0) F_large = append (F_large, cur_F_large);
-		cur_S_large = removeEmpty (target = cur_S_large, margin = "cols");
-		if (as.scalar (cur_S_large[1,1]) != 0) S_large = append (S_large, cur_S_large);	
-			
-		num_cur_nodes_large_pre = 2 * num_cur_nodes_large;
-		if (as.scalar (cur_Q_large[1,1]) == 0) {
-			num_cur_nodes_large = 0;
-		} else {
-			cur_nodes_large = cur_Q_large;
-			num_cur_nodes_large = ncol (cur_Q_large);
-		}	
-	}
-
-	##### PREPARE MODEL FOR SMALL NODES	
-	cur_nodes_small_nonzero = removeEmpty (target = cur_nodes_small, margin = "cols");
-	if (as.scalar (cur_nodes_small_nonzero[1,1]) != 0) { # if SMALL nodes exist
-		num_cur_nodes_small = ncol (cur_nodes_small_nonzero);
-	}
-		
-	if (num_cur_nodes_small > 0) { # SMALL nodes to process		
-		reserve_len = sum (2 ^ (ceil (log (cur_nodes_small_nonzero[2,]) / log (2)))) + num_cur_nodes_small;
-		cur_Q_small =  matrix (0, rows = 2, cols = reserve_len);
-		cur_F_small = matrix (0, rows = 3, cols = reserve_len); 
-		cur_NC_small = matrix (0, rows = 5, cols = reserve_len); 
-		cur_S_small = matrix (0, rows = 1, cols = reserve_len * distinct_values_max); # split values of the best feature	
-	}
-
-	##### LOOP OVER SMALL NODES...
-	parfor (i7 in 1:num_cur_nodes_small, check = 0) { 
-	
-		cur_node_small = as.scalar (cur_nodes_small_nonzero[1,i7]);	
-		cur_tree_small = as.scalar (cur_nodes_small_nonzero[3,i7]);
-		
-		# build dataset for SMALL node
-		Ix = ppred (L[,cur_tree_small], cur_node_small, "==");			
-		if (num_scale_features > 0) {
-			X_scale_ext_small = removeEmpty (target = X_scale_ext, margin = "rows", select = Ix);
-		}
-		if (num_cat_features > 0) {
-			X_cat_small = removeEmpty (target = X_cat, margin = "rows", select = Ix);
-		}
-		
-		L_small = removeEmpty (target = L * Ix, margin = "rows");
-		C_small = removeEmpty (target = C * Ix, margin = "rows");
-		Y_bin_small = removeEmpty (target = Y_bin * Ix, margin = "rows");
-			
-		# compute offset
-		offsets = cumsum (t (2 ^ ceil (log (cur_nodes_small_nonzero[2,]) / log (2))));
-		start_ind_global = 1;
-		if (i7 > 1) {
-			start_ind_global = start_ind_global + as.scalar (offsets[(i7 - 1),]);
-		}
-		start_ind_S_global = 1;
-		if (i7 > 1) {
-			start_ind_S_global = start_ind_S_global + (as.scalar (offsets[(i7 - 1),]) * distinct_values_max);
-		}
-			
-		Q = matrix (0, rows = 2, cols = 1); 
-		Q[1,1] = cur_node_small;
-		Q[2,1] = cur_tree_small;
-		F = matrix (0, rows = 3, cols = 1); 
-		NC = matrix (0, rows = 5, cols = 1); 
-		S = matrix (0, rows = 1, cols = 1); 
-
-		cur_nodes_ = matrix (cur_node_small, rows = 2, cols = 1);
-		cur_nodes_[1,1] = cur_node_small;
-		cur_nodes_[2,1] = cur_tree_small;
-		
-		num_cur_nodes = 1;
-		level_ = level;
-		while (num_cur_nodes > 0 & level_ < depth) {
-			
-			level_ = level_ + 1;
-				
-			cur_Q = matrix (0, rows = 2, cols = 2 * num_cur_nodes);
-			cur_F = matrix (0, rows = 3, cols = num_cur_nodes); 
-			cur_NC = matrix (0, rows = 5, cols = 2 * num_cur_nodes); 
-			cur_S = matrix (0, rows = 1, cols = num_cur_nodes * distinct_values_max);
-		
-			parfor (i8 in 1:num_cur_nodes, check = 0) { 
-			
-				cur_node = as.scalar (cur_nodes_[1,i8]);	
-				cur_tree = as.scalar (cur_nodes_[2,i8]);
-				
-				# select sample features WOR
-				feature_samples = sample (num_features_total, num_feature_samples);
-				feature_samples = order (target = feature_samples, by = 1);
-				num_scale_feature_samples = sum (ppred (feature_samples, num_scale_features, "<="));
-				num_cat_feature_samples = num_feature_samples - num_scale_feature_samples;
-		
-				# --- find best split ---
-				# samples that reach cur_node 
-				Ix = ppred (L_small[,cur_tree], cur_node, "==");		
-				cur_Y_bin = Y_bin_small * (Ix * C_small[,cur_tree]);
-				label_counts_overall = colSums (cur_Y_bin);
-				
-				label_sum_overall = sum (label_counts_overall);
-				label_dist_overall = label_counts_overall / label_sum_overall;
-				if (imp == "entropy") {
-					label_dist_zero = ppred (label_dist_overall, 0, "==");
-					cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2);
-				} else { # imp == "Gini"
-					cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); 			
-				}
-				best_scale_gain = 0;
-				best_cat_gain = 0;
-				if (num_scale_features > 0 & num_scale_feature_samples > 0) {
-					
-					scale_feature_samples = feature_samples[1:num_scale_feature_samples,];
-					
-					# main operation	
-					label_counts_left_scale = t (t (cur_Y_bin) %*% X_scale_ext_small); 
-		
-					# compute left and right label distribution
-					label_sum_left = rowSums (label_counts_left_scale);
-					label_dist_left = label_counts_left_scale / label_sum_left;
-					if (imp == "entropy") {
-						label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
-						log_label_dist_left = log (label_dist_left); # / log (2)
-						impurity_left_scale = - rowSums (label_dist_left * log_label_dist_left); 
-					} else { # imp == "Gini"
-						impurity_left_scale = rowSums (label_dist_left * (1 - label_dist_left)); 
-					}
-					#
-					label_counts_right_scale = - label_counts_left_scale + label_counts_overall; 
-					label_sum_right = rowSums (label_counts_right_scale);
-					label_dist_right = label_counts_right_scale / label_sum_right;
-					if (imp == "entropy") {
-						label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
-						log_label_dist_right = log (label_dist_right); # log (2)
-						impurity_right_scale = - rowSums (label_dist_right * log_label_dist_right); 		
-					} else { # imp == "Gini"
-						impurity_right_scale = rowSums (label_dist_right * (1 - label_dist_right)); 			
-					}
-					I_gain_scale = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left_scale + ( label_sum_right / label_sum_overall ) * impurity_right_scale); 
-			
-					I_gain_scale = replace (target = I_gain_scale, pattern = "NaN", replacement = 0);		
-			
-					# determine best feature to split on and the split value
-					feature_start_ind = matrix (0, rows = 1, cols = num_scale_features);
-					feature_start_ind[1,1] = 1;
-					if (num_scale_features > 1) {
-						feature_start_ind[1,2:num_scale_features] = cum_count_thresholds[1,1:(num_scale_features - 1)] + 1;
-					}
-					max_I_gain_found = 0;
-					max_I_gain_found_ind = 0;
-					best_i = 0;			
-						
-					for (i in 1:num_scale_feature_samples) { # assuming feature_samples is 5x1
-						cur_feature_samples_bin = as.scalar (scale_feature_samples[i,]); 
-						cur_start_ind = as.scalar (feature_start_ind[,cur_feature_samples_bin]);
-						cur_end_ind = as.scalar (cum_count_thresholds[,cur_feature_samples_bin]);
-						I_gain_portion = I_gain_scale[cur_start_ind:cur_end_ind,];
-						cur_max_I_gain = max (I_gain_portion);
-						cur_max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain_portion)));
-						if (cur_max_I_gain > max_I_gain_found) {
-							max_I_gain_found = cur_max_I_gain;
-							max_I_gain_found_ind = cur_max_I_gain_ind;
-							best_i = i;
-						}
-					}
-	
-					best_scale_gain = max_I_gain_found;
-					max_I_gain_ind_scale = max_I_gain_found_ind;
-					best_scale_feature = 0;
-					if (best_i > 0) {
-						best_scale_feature = as.scalar (scale_feature_samples[best_i,]);
-					}
-					best_scale_split = max_I_gain_ind_scale;
-					if (best_scale_feature > 1) {
-						best_scale_split = best_scale_split + as.scalar(cum_count_thresholds[,(best_scale_feature - 1)]);
-					}				
-				}
-				
-				if (num_cat_features > 0 & num_cat_feature_samples > 0){
-						
-					cat_feature_samples = feature_samples[(num_scale_feature_samples + 1):(num_scale_feature_samples + num_cat_feature_samples),] - num_scale_features;
-						
-					# initialization
-					split_values_bin = matrix (0, rows = 1, cols = distinct_values_overall); 
-					split_values = split_values_bin; 
-					split_values_offset = matrix (0, rows = 1, cols = num_cat_features); 
-					I_gains = split_values_offset; 
-					impurities_left = split_values_offset;
-					impurities_right = split_values_offset;
-					best_label_counts_left = matrix (0, rows = num_cat_features, cols = num_classes);
-					best_label_counts_right = matrix (0, rows = num_cat_features, cols = num_classes);
-			
-					# main operation
-					label_counts = t (t (cur_Y_bin) %*% X_cat_small);  		
-					
-					parfor (i9 in 1:num_cat_feature_samples, check = 0){
-				
-						cur_cat_feature = as.scalar (cat_feature_samples[i9,1]);
-						start_ind = 1;
-						if (cur_cat_feature > 1) {
-							start_ind = start_ind + as.scalar (distinct_values_offset[(cur_cat_feature - 1),]);
-						}
-						offset = as.scalar (distinct_values[1,cur_cat_feature]);
-				
-						cur_label_counts = label_counts[start_ind:(start_ind + offset - 1),];
-							
-						label_sum = rowSums (cur_label_counts);
-						label_dist = cur_label_counts / label_sum;
-						if (imp == "entropy") {
-							label_dist = replace (target = label_dist, pattern = 0, replacement = 1);
-							log_label_dist = log (label_dist); # / log(2)
-							impurity = - rowSums (label_dist * log_label_dist); 
-							impurity = replace (target = impurity, pattern = "NaN", replacement = 1/0); 
-						} else { # imp == "Gini"
-							impurity = rowSums (label_dist * (1 - label_dist)); 				
-						}
-				
-						# sort cur feature by impurity
-						cur_distinct_values = seq (1, nrow (cur_label_counts));
-						cur_distinct_values_impurity = append (cur_distinct_values, impurity);
-						cur_feature_sorted = order (target = cur_distinct_values_impurity, by = 2, decreasing = FALSE);
-						P = table (cur_distinct_values, cur_feature_sorted); # permutation matrix
-						label_counts_sorted = P %*% cur_label_counts;
-	
-						# compute left and right label distribution			
-						label_counts_left = cumsum (label_counts_sorted);
-			
-						label_sum_left = rowSums (label_counts_left);
-						label_dist_left = label_counts_left / label_sum_left;
-						label_dist_left = replace (target = label_dist_left, pattern = "NaN", replacement = 1);
-						if (imp == "entropy") {
-							label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
-							log_label_dist_left = log (label_dist_left); # / log(2)
-							impurity_left = - rowSums (label_dist_left * log_label_dist_left);
-						} else { # imp == "Gini"
-							impurity_left = rowSums (label_dist_left * (1 - label_dist_left));					
-						}
-						#
-						label_counts_right = - label_counts_left + label_counts_overall;
-						label_sum_right = rowSums (label_counts_right);
-						label_dist_right = label_counts_right / label_sum_right;
-						label_dist_right = replace (target = label_dist_right, pattern = "NaN", replacement = 1);
-						if (imp == "entropy") {
-							label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
-							log_label_dist_right = log (label_dist_right); # / log (2)
-							impurity_right = - rowSums (label_dist_right * log_label_dist_right);			
-						} else { # imp == "Gini"
-							impurity_right = rowSums (label_dist_right * (1 - label_dist_right));					
-						}
-						I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right);
-			
-						Ix_label_sum_left_zero = ppred (label_sum_left, 0, "==");
-						Ix_label_sum_right_zero = ppred (label_sum_right, 0, "==");
-						Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero;
-						I_gain = I_gain * (1 - Ix_label_sum_zero);	
-				
-						I_gain[nrow (I_gain),] = 0; # last entry invalid
-			
-						max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain)));
-
-						split_values[1, start_ind:(start_ind + max_I_gain_ind - 1)] = t (cur_feature_sorted[1:max_I_gain_ind,1]);
-						for (i10 in 1:max_I_gain_ind) {
-							ind = as.scalar (cur_feature_sorted[i10,1]);
-							if (ind == 1) {
-								split_values_bin[1,start_ind] = 1.0; 
-							} else {
-								split_values_bin[1,(start_ind + ind - 1)] = 1.0;
-							}
-						}
-						split_values_offset[1,cur_cat_feature] = max_I_gain_ind;
-			
-						I_gains[1,cur_cat_feature] = max (I_gain);
-		
-						impurities_left[1,cur_cat_feature] = as.scalar (impurity_left[max_I_gain_ind,]);
-						impurities_right[1,cur_cat_feature] = as.scalar (impurity_right[max_I_gain_ind,]);
-						best_label_counts_left[cur_cat_feature,] = label_counts_left[max_I_gain_ind,];
-						best_label_counts_right[cur_cat_feature,] = label_counts_right[max_I_gain_ind,];				
-					}
-			
-					# determine best feature to split on and the split values
-					best_cat_feature = as.scalar (rowIndexMax (I_gains));
-					best_cat_gain = max (I_gains);
-					start_ind = 1;
-					if (best_cat_feature > 1) {
-						start_ind = start_ind + as.scalar (distinct_values_offset[(best_cat_feature - 1),]);
-					}
-					offset = as.scalar (distinct_values[1,best_cat_feature]);
-					best_split_values_bin = split_values_bin[1, start_ind:(start_ind + offset - 1)];
-				}
-				
-				# compare best scale feature to best cat. feature and pick the best one
-				if (num_scale_features > 0 & num_scale_feature_samples > 0 & best_scale_gain >= best_cat_gain & best_scale_gain > 0) {
-					
-					# --- update model ---
-					cur_F[1,i8] = best_scale_feature;
-					cur_F[2,i8] = 1;
-					cur_F[3,i8] = 1;		
-					cur_S[1,(i8 - 1) * distinct_values_max + 1] = thresholds[max_I_gain_ind_scale, best_scale_feature]; 
-					
-					left_child = 2 * (cur_node - 1) + 1 + 1;
-					right_child = 2 * (cur_node - 1) + 2 + 1;
-					
-					# samples going to the left subtree
-					Ix_left = X_scale_ext_small[, best_scale_split]; 
-											
-					Ix_left = Ix * Ix_left;		
-					Ix_right = Ix * (1 - Ix_left);
-				
-					L_small[,cur_tree] = L_small[,cur_tree] * (1 - Ix_left) + (Ix_left * left_child);
-					L_small[,cur_tree] = L_small[,cur_tree] * (1 - Ix_right) + (Ix_right * right_child);								
-			
-					left_child_size = sum (Ix_left * C_small[,cur_tree]);
-					right_child_size = sum (Ix_right * C_small[,cur_tree]);
-				
-					# check if left or right child is a leaf
-					left_pure = FALSE;
-					right_pure = FALSE;
-					cur_impurity_left = as.scalar(impurity_left_scale[best_scale_split,]); 
-					cur_impurity_right = as.scalar(impurity_right_scale[best_scale_split,]); 
-					if ( (left_child_size <= num_leaf | cur_impurity_left == 0 | level_ == depth) & 
-					   (right_child_size <= num_leaf | cur_impurity_right == 0 | level_ == depth) ) { # both left and right nodes are leaf	
-						
-						cur_label_counts_left = label_counts_left_scale[best_scale_split,]; 
-						cur_NC[1,(2 * (i8 - 1) + 1)] = left_child; 
-						cur_NC[2,(2 * (i8 - 1) + 1)] = cur_tree;
-						cur_NC[3,(2 * (i8 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label
-						left_pure = TRUE;	
-						# compute number of misclassified points 
-						cur_NC[4,(2 * (i8 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
-				
-						cur_label_counts_right = label_counts_overall - cur_label_counts_left;
-						cur_NC[1,(2 * i8)] = right_child; 
-						cur_NC[2,(2 * i8)] = cur_tree;	
-						cur_NC[3,(2 * i8)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-						right_pure = TRUE;	
-						# compute number of misclassified points 
-						cur_NC[4,(2 * i8)] = right_child_size - max (cur_label_counts_right);					
-						
-					} else if (left_child_size <= num_leaf | cur_impurity_left == 0 | level_ == depth) {
-						
-						cur_label_counts_left = label_counts_left_scale[best_scale_split,]; 
-						cur_NC[1,(2 * (i8 - 1) + 1)] = left_child;
-						cur_NC[2,(2 * (i8 - 1) + 1)] = cur_tree;						
-						cur_NC[3,(2 * (i8 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label				
-						left_pure = TRUE;	
-						# compute number of misclassified points 
-						cur_NC[4,(2 * (i8 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
-						
-					} else if (right_child_size <= num_leaf | cur_impurity_right == 0 | level_ == depth) {
-						
-						cur_label_counts_right = label_counts_right_scale[best_scale_split,]; 
-						cur_NC[1,(2 * i8)] = right_child;
-						cur_NC[2,(2 * i8)] = cur_tree;							
-						cur_NC[3,(2 * i8)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-						right_pure = TRUE;
-						# compute number of misclassified points 
-						cur_NC[4,(2 * i8)] = right_child_size - max (cur_label_counts_right);						
-						
-					}									
-														
-				} else if (num_cat_features > 0 & num_cat_feature_samples > 0 & best_cat_gain > 0) {
-					
-					# --- update model ---
-					cur_F[1,i8] = best_cat_feature;
-					cur_F[2,i8] = 2;
-					offset_nonzero = as.scalar (split_values_offset[1,best_cat_feature]);
-					S_start_ind = (i8 - 1) * distinct_values_max + 1;
-					cur_F[3,i8] = offset_nonzero;
-					cur_S[1,S_start_ind:(S_start_ind + offset_nonzero - 1)] = split_values[1,start_ind:(start_ind + offset_nonzero - 1)];	
-		
-					left_child = 2 * (cur_node - 1) + 1 + 1;
-					right_child = 2 * (cur_node - 1) + 2 + 1;
-					
-					# samples going to the left subtree
-					Ix_left = rowSums (X_cat_small[,start_ind:(start_ind + offset - 1)] * best_split_values_bin);
-					Ix_left = ppred (Ix_left, 1, ">=");
-											  
-					Ix_left = Ix * Ix_left;		
-					Ix_right = Ix * (1 - Ix_left);
-			
-					L_small[,cur_tree] = L_small[,cur_tree] * (1 - Ix_left) + (Ix_left * left_child);
-					L_small[,cur_tree] = L_small[,cur_tree] * (1 - Ix_right) + (Ix_right * right_child);								
-			
-					left_child_size = sum (Ix_left * C_small[,cur_tree]);
-					right_child_size = sum (Ix_right * C_small[,cur_tree]);
-		
-					# check if left or right child is a leaf
-					left_pure = FALSE;
-					right_pure = FALSE;
-					cur_impurity_left = as.scalar(impurities_left[,best_cat_feature]); 
-					cur_impurity_right = as.scalar(impurities_right[,best_cat_feature]); 
-					if ( (left_child_size <= num_leaf | cur_impurity_left == 0 | level_ == depth) & 
-					   (right_child_size <= num_leaf | cur_impurity_right == 0 | level_ == depth) ) { # both left and right nodes are leaf	
-						
-						cur_label_counts_left = best_label_counts_left[best_cat_feature,];
-						cur_NC[1,(2 * (i8 - 1) + 1)] = left_child;
-						cur_NC[2,(2 * (i8 - 1) + 1)] = cur_tree;						
-						cur_NC[3,(2 * (i8 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label
-						left_pure = TRUE;
-						# compute number of misclassified points 
-						cur_NC[4,(2 * (i8 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
-						
-						cur_label_counts_right = label_counts_overall - cur_label_counts_left;
-						cur_NC[1,(2 * i8)] = right_child;
-						cur_NC[2,(2 * i8)] = cur_tree;						
-						cur_NC[3,(2 * i8)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-						right_pure = TRUE;
-						# compute number of misclassified points 
-						cur_NC[4,(2 * i8)] = right_child_size - max (cur_label_counts_right);
-						
-					} else if (left_child_size <= num_leaf | cur_impurity_left == 0 | level_ == depth) {	
-					
-						cur_label_counts_left = best_label_counts_left[best_cat_feature,];
-						cur_NC[1,(2 * (i8 - 1) + 1)] = left_child; 
-						cur_NC[2,(2 * (i8 - 1) + 1)] = cur_tree;
-						cur_NC[3,(2 * (i8 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label				
-						left_pure = TRUE;
-						# compute number of misclassified points 
-						cur_NC[4,(2 * (i8 - 1) + 1)] = left_child_size - max (cur_label_counts_left);
-						
-					} else if (right_child_size <= num_leaf | cur_impurity_right == 0 | level_ == depth) {
-						cur_label_counts_right = best_label_counts_right[best_cat_feature,];
-						cur_NC[1,(2 * i8)] = right_child; 
-						cur_NC[2,(2 * i8)] = cur_tree;
-						cur_NC[3,(2 * i8)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
-						right_pure = TRUE;
-						# compute number of misclassified points 
-						cur_NC[4,(2 * i8)] = right_child_size - max (cur_label_counts_right);
-						
-					}		
-				} else {
-							
-					print ("NUMBER OF SAMPLES AT NODE " + cur_node + " in tree " + cur_tree + " CANNOT BE REDUCED TO MATCH " + num_leaf + ". THIS NODE IS DECLARED AS LEAF!");							
-					right_pure = TRUE;
-					left_pure = TRUE;
-					cur_NC[1,(2 * (i8 - 1) + 1)] = cur_node;
-					cur_NC[2,(2 * (i8 - 1) + 1)] = cur_tree;
-					class_label = as.scalar (rowIndexMax (label_counts_overall));
-					cur_NC[3,(2 * (i8 - 1) + 1)] = class_label;
-					cur_NC[4,(2 * (i8 - 1) + 1)] = label_sum_overall - max (label_counts_overall);
-					cur_NC[5,(2 * (i8 - 1) + 1)] = 1; # special leaf
-					
-				}
-			
-				# add nodes to Q
-				if (!left_pure) {
-					cur_Q[1,(2 * (i8 - 1)+ 1)] = left_child; 
-					cur_Q[2,(2 * (i8 - 1)+ 1)] = cur_tree; 
-				}
-				if (!right_pure) {
-					cur_Q[1,(2 * i8)] = right_child;
-					cur_Q[2,(2 * i8)] = cur_tree;				
-				}
-			}
-		
-			cur_Q = removeEmpty (target = cur_Q, margin = "cols"); 
-			Q = append (Q, cur_Q);
-			NC = append (NC, cur_NC);
-			F = append (F, cur_F);
-			S = append (S, cur_S);
-		
-			num_cur_nodes_pre = 2 * num_cur_nodes;
-			if (as.scalar (cur_Q[1,1]) == 0) {
-				num_cur_nodes = 0;
-			} else {
-				cur_nodes_ = cur_Q;
-				num_cur_nodes = ncol (cur_Q);
-			}
-		}
-		
-		cur_Q_small[,start_ind_global:(start_ind_global + ncol (Q) - 1)] = Q;
-		cur_NC_small[,start_ind_global:(start_ind_global + ncol (NC) - 1)] = NC;
-		cur_F_small[,start_ind_global:(start_ind_global + ncol (F) - 1)] = F;	
-		cur_S_small[,start_ind_S_global:(start_ind_S_global + ncol (S) - 1)] = S; 
-	}
-
-	##### PREPARE MODEL FOR SMALL NODES	
-	if (num_cur_nodes_small > 0) {	# small nodes already processed
-		cur_Q_small = removeEmpty (target = cur_Q_small, margin = "cols");
-		if (as.scalar (cur_Q_small[1,1]) != 0) Q_small = append (Q_small, cur_Q_small);
-		cur_NC_small = removeEmpty (target = cur_NC_small, margin = "cols");
-		if (as.scalar (cur_NC_small[1,1]) != 0) NC_small = append (NC_small, cur_NC_small);
-	
-		cur_F_small = removeEmpty (target = cur_F_small, margin = "cols"); # 
-		if (as.scalar (cur_F_small[1,1]) != 0) F_small = append (F_small, cur_F_small);
-		cur_S_small = removeEmpty (target = cur_S_small, margin = "cols"); #		
-		if (as.scalar (cur_S_small[1,1]) != 0) S_small = append (S_small, cur_S_small); 
-		
-		num_cur_nodes_small = 0; # reset
-	} 
-		
-	print (" --- end level " + level + ", remaining no. of LARGE nodes to expand " + num_cur_nodes_large + " --- ");
-}
-
-#### prepare model
-print ("PREPARING MODEL...")
-### large nodes
-if (as.scalar (Q_large[1,1]) == 0 & ncol (Q_large) > 1) {
-	Q_large = Q_large[,2:ncol (Q_large)];
-}
-if (as.scalar (NC_large[1,1]) == 0 & ncol (NC_large) > 1) {
-	NC_large = NC_large[,2:ncol (NC_large)];
-}
-if (as.scalar (S_large[1,1]) == 0 & ncol (S_large) > 1) {
-	S_large = S_large[,2:ncol (S_large)];
-}
-if (as.scalar (F_large[1,1]) == 0 & ncol (F_large) > 1) {
-	F_large = F_large[,2:ncol (F_large)];
-}
-### small nodes
-if (as.scalar (Q_small[1,1]) == 0 & ncol (Q_small) > 1) {
-	Q_small = Q_small[,2:ncol (Q_small)];
-}
-if (as.scalar (NC_small[1,1]) == 0 & ncol (NC_small) > 1) {
-	NC_small = NC_small[,2:ncol (NC_small)];
-}
-if (as.scalar (S_small[1,1]) == 0 & ncol (S_small) > 1) {
-	S_small = S_small[,2:ncol (S_small)];
-}
-if (as.scalar (F_small[1,1]) == 0 & ncol (F_small) > 1) {
-	F_small = F_small[,2:ncol (F_small)];
-}
-
-# check for special leaves and if there are any remove them from Q_large and Q_small 
-special_large_leaves_ind = NC_large[5,];
-num_special_large_leaf = sum (special_large_leaves_ind);
-if (num_special_large_leaf > 0) {
-	print ("PROCESSING " + num_special_large_leaf + " SPECIAL LARGE LEAVES...");
-	special_large_leaves = removeEmpty (target = NC_large[1:2,] * special_large_leaves_ind, margin = "cols");
-	large_internal_ind = 1 - colSums (outer (t (special_large_leaves[1,]), Q_large[1,], "==") * outer (t (special_large_leaves[2,]), Q_large[2,], "=="));
-	Q_large = removeEmpty (target = Q_large * large_internal_ind, margin = "cols");
-	F_large = removeEmpty (target = F_large, margin = "cols"); # remove special leaves from F
-}
-
-special_small_leaves_ind = NC_small[5,];
-num_special_small_leaf = sum (special_small_leaves_ind);
-if (num_special_small_leaf > 0) {
-	print ("PROCESSING " + num_special_small_leaf + " SPECIAL SMALL LEAVES...");
-	special_small_leaves = removeEmpty (target = NC_small[1:2,] * special_small_leaves_ind, margin = "cols");
-	small_internal_ind = 1 - colSums (outer (t (special_small_leaves[1,]), Q_small[1,], "==") * outer (t (special_small_leaves[2,]), Q_small[2,], "=="));
-	Q_small = removeEmpty (target = Q_small * small_internal_ind, margin = "cols");
-	F_small = removeEmpty (target = F_small, margin = "cols"); # remove special leaves from F
-}
-
-# model corresponding to large internal nodes
-no_large_internal_node = FALSE;
-if (as.scalar (Q_large[1,1]) != 0) {
-	print ("PROCESSING LARGE INTERNAL NODES...");
-	num_large_internal = ncol (Q_large);
-	max_offset = max (max (F_large[3,]), max (F_small[3,]));
-	M1_large = matrix (0, rows = 6 + max_offset, cols = num_large_internal);
-	M1_large[1:2,] = Q_large;
-	M1_large[4:6,] = F_large;
-	# process S_large
-	cum_offsets_large = cumsum (t (F_large[3,]));
-	parfor (it in 1:num_large_internal, check = 0) {
-		start_ind = 1;
-		if (it > 1) {
-			start_ind = start_ind + as.scalar (cum_offsets_large[(it - 1),]);
-		}
-		offset = as.scalar (F_large[3,it]);
-		M1_large[7:(7 + offset - 1),it] = t (S_large[1,start_ind:(start_ind + offset - 1)]); 
-	}	
-} else {
-	print ("No LARGE internal nodes available");
-	no_large_internal_node = TRUE;
-}
-
-# model corresponding to small internal nodes
-no_small_internal_node = FALSE;
-if (as.scalar (Q_small[1,1]) != 0) {
-	print ("PROCESSING SMALL INTERNAL NODES...");
-	num_small_internal = ncol (Q_small);
-	M1_small = matrix (0, rows = 6 + max_offset, cols = num_small_internal);
-	M1_small[1:2,] = Q_small;
-	M1_small[4:6,] = F_small;
-	# process S_small
-	cum_offsets_small = cumsum (t (F_small[3,]));
-	parfor (it in 1:num_small_internal, check = 0) {
-		start_ind = 1;
-		if (it > 1) {
-			start_ind = start_ind + as.scalar (cum_offsets_small[(it - 1),]);
-		}
-		offset = as.scalar (F_small[3,it]);
-		M1_small[7:(7 + offset - 1),it] = t (S_small[1,start_ind:(start_ind + offset - 1)]); 
-	}
-} else {
-	print ("No SMALL internal nodes available");	
-	no_small_internal_node = TRUE;
-}
-
-# model corresponding to large leaf nodes
-no_large_leaf_node = FALSE;
-if (as.scalar (NC_large[1,1]) != 0) {
-	print ("PROCESSING LARGE LEAF NODES...");
-	num_large_leaf = ncol (NC_large);
-	M2_large = matrix (0, rows = 6 + max_offset, cols = num_large_leaf);
-	M2_large[1:2,] = NC_large[1:2,];
-	M2_large[5:7,] = NC_large[3:5,];
-} else {
-	print ("No LARGE leaf nodes available");
-	no_large_leaf_node = TRUE;
-}
-
-# model corresponding to small leaf nodes
-no_small_leaf_node = FALSE;
-if (as.scalar (NC_small[1,1]) != 0) {
-	print ("PROCESSING SMALL LEAF NODES...");
-	num_small_leaf = ncol (NC_small);
-	M2_small = matrix (0, rows = 6 + max_offset, cols = num_small_leaf);
-	M2_small[1:2,] = NC_small[1:2,];
-	M2_small[5:7,] = NC_small[3:5,];
-} else {
-	print ("No SMALL leaf nodes available");
-	no_small_leaf_node = TRUE;
-}
-
-if (no_large_internal_node) {
-	M1 = M1_small;
-} else if (no_small_internal_node) {
-	M1 = M1_large;
-} else {
-	M1 = append (M1_large, M1_small);
-}
-
-if (no_large_leaf_node) {
-	M2 = M2_small;
-} else if (no_small_leaf_node) {
-	M2 = M2_large;
-} else {
-	M2 = append (M2_large, M2_small);
-}
-
-M = append (M1, M2);
-M = t (order (target = t (M), by = 1)); # sort by node id
-M = t (order (target = t (M), by = 2)); # sort by tree id
-
-
-# removing redundant subtrees
-if (ncol (M) > 1) {
-	print ("CHECKING FOR REDUNDANT SUBTREES...");
-	red_leaf = TRUE;
-	process_red_subtree = FALSE;
-	invalid_node_ind = matrix (0, rows = 1, cols = ncol (M));
-	while (red_leaf & ncol (M) > 1) {
-		leaf_ind = ppred (M[4,], 0, "==");
-		labels = M[5,] * leaf_ind;
-		tree_ids = M[2,];
-		parent_ids = floor (M[1,] /2);
-		cond1 = ppred (labels[,1:(ncol (M) - 1)], labels[,2:ncol (M)], "=="); # siebling leaves with same label
-		cond2 = ppred (parent_ids[,1:(ncol (M) - 1)], parent_ids[,2:ncol (M)], "=="); # same parents
-		cond3 = ppred (tree_ids[,1:(ncol (M) - 1)], tree_ids[,2:ncol (M)], "=="); # same tree
-		red_leaf_ind =  cond1 * cond2 * cond3 * leaf_ind[,2:ncol (M)];	
-		
-		if (sum (red_leaf_ind) > 0) { # if redundant subtrees exist
-			red_leaf_ids = M[1:2,2:ncol (M)] * red_leaf_ind;
-			red_leaf_ids_nonzero = removeEmpty (target = red_leaf_ids, margin = "cols");
-			parfor (it in 1:ncol (red_leaf_ids_nonzero), check = 0){
-				cur_right_leaf_id = as.scalar (red_leaf_ids_nonzero[1,it]); 
-				cur_parent_id = floor (cur_right_leaf_id / 2);
-				cur_tree_id = as.scalar (red_leaf_ids_nonzero[2,it]); 
-				cur_right_leaf_pos = as.scalar (rowIndexMax (ppred (M[1,], cur_right_leaf_id, "==") * ppred (M[2,], cur_tree_id, "==")));
-				cur_parent_pos = as.scalar(rowIndexMax (ppred (M[1,], cur_parent_id, "==") * ppred (M[2,], cur_tree_id, "==")));
-				M[3:nrow (M), cur_parent_pos] = M[3:nrow (M), cur_right_leaf_pos];
-				M[4,cur_right_leaf_pos] = -1;
-				M[4,cur_right_leaf_pos - 1] = -1;
-				invalid_node_ind[1,cur_right_leaf_pos] = 1;
-				invalid_node_ind[1,cur_right_leaf_pos - 1] = 1;				
-			}
-			process_red_subtree = TRUE;
-		} else {
-			red_leaf = FALSE;
-		}
-	}
-	
-	if (process_red_subtree) {
-		print ("REMOVING REDUNDANT SUBTREES...");
-		valid_node_ind = ppred (invalid_node_ind, 0, "==");
-		M = removeEmpty (target = M * valid_node_ind, margin = "cols");
-	}
-}
-
-internal_ind = ppred (M[4,], 0, ">");
-internal_ids = M[1:2,] * internal_ind; 
-internal_ids_nonzero = removeEmpty (target = internal_ids, margin = "cols");
-if (as.scalar (internal_ids_nonzero[1,1]) > 0) { # if internal nodes exist 
-    a1 = internal_ids_nonzero[1,];
-    a2 = internal_ids_nonzero[1,] * 2;
-    vcur_tree_id = internal_ids_nonzero[2,];
-    pos_a1 = rowIndexMax( outer(t(a1), M[1,], "==") * outer(t(vcur_tree_id), M[2,], "==") );
-    pos_a2 = rowIndexMax( outer(t(a2), M[1,], "==") * outer(t(vcur_tree_id), M[2,], "==") );
-    M[3,] = t(table(pos_a1, 1, pos_a2 - pos_a1, ncol(M), 1));
-} 
-else {
-    print ("All trees in the random forest contain only one leaf!");
-}
-
-if (fileC != " ") {
-	write (C, fileC, format = fmtO);
-}
-write (M, fileM, format = fmtO);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT IMPLEMENTS CLASSIFICATION RANDOM FOREST WITH BOTH SCALE AND CATEGORICAL FEATURES
+#
+# INPUT         		PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME          		TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X             		String   ---          Location to read feature matrix X; note that X needs to be both recoded and dummy coded 
+# Y 					String   ---		  Location to read label matrix Y; note that Y needs to be both recoded and dummy coded
+# R   	  				String   " "	      Location to read the matrix R which for each feature in X contains the following information 
+#												- R[,1]: column ids
+#												- R[,2]: start indices 
+#												- R[,3]: end indices
+#											  If R is not provided by default all variables are assumed to be scale
+# bins          		Int 	 20			  Number of equiheight bins per scale feature to choose thresholds
+# depth         		Int 	 25			  Maximum depth of the learned tree
+# num_leaf      		Int      10           Number of samples when splitting stops and a leaf node is added
+# num_samples   		Int 	 3000		  Number of samples at which point we switch to in-memory subtree building
+# num_trees     		Int 	 10			  Number of trees to be learned in the random forest model
+# subsamp_rate  		Double   1.0		  Parameter controlling the size of each tree in the forest; samples are selected from a 
+#											  Poisson distribution with parameter subsamp_rate (the default value is 1.0)
+# feature_subset    	Double   0.5    	  Parameter that controls the number of feature used as candidates for splitting at each tree node 
+#											  as a power of number of features in the dataset;
+#											  by default square root of features (i.e., feature_subset = 0.5) are used at each tree node 
+# impurity      		String   "Gini"    	  Impurity measure: entropy or Gini (the default)
+# M             		String 	 ---	   	  Location to write matrix M containing the learned tree
+# C 					String   " "		  Location to write matrix C containing the number of times samples are chosen in each tree of the random forest 
+# S_map					String   " "		  Location to write the mappings from scale feature ids to global feature ids
+# C_map					String   " "		  Location to write the mappings from categorical feature ids to global feature ids
+# fmt     	    		String   "text"       The output format of the model (matrix M), such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# Matrix M where each column corresponds to a node in the learned tree and each row contains the following information:
+#	 M[1,j]: id of node j (in a complete binary tree)
+#	 M[2,j]: tree id to which node j belongs
+#	 M[3,j]: Offset (no. of columns) to left child of j 
+#	 M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
+#	 M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
+#		     otherwise the label that leaf node j is supposed to predict
+#	 M[6,j]: 1 if j is an internal node and the feature chosen for j is scale, otherwise the size of the subset of values 
+#			 stored in rows 7,8,... if j is categorical
+#	 M[7:,j]: Only applicable for internal nodes. Threshold the example's feature value is compared to is stored at M[7,j] if the feature chosen for j is scale;
+# 			  If the feature chosen for j is categorical rows 7,8,... depict the value subset chosen for j   
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f random-forest.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=OUTPUT_DIR/model
+#     				                 				   bins=20 depth=25 num_leaf=10 num_samples=3000 num_trees=10 impurity=Gini fmt=csv
+
+
+# External function for binning
+binning = externalFunction(Matrix[Double] A, Integer binsize, Integer numbins) return (Matrix[Double] B, Integer numbinsdef) 
+	implemented in (classname="org.apache.sysml.udf.lib.BinningWrapper",exectype="mem")
+
+	
+# Default values of some parameters	
+fileR = ifdef ($R, " ");
+fileC = ifdef ($C, " ");
+fileS_map = ifdef ($S_map, " ");
+fileC_map = ifdef ($C_map, " ");
+fileM = $M;	
+num_bins = ifdef($bins, 20); 
+depth = ifdef($depth, 25);
+num_leaf = ifdef($num_leaf, 10);
+num_trees = ifdef($num_trees, 1); 
+threshold = ifdef ($num_samples, 3000);
+imp = ifdef($impurity, "Gini");
+rate = ifdef ($subsamp_rate, 1);
+fpow = ifdef ($feature_subset, 0.5);
+fmtO = ifdef($fmt, "text");
+
+X = read($X);
+Y_bin = read($Y);
+num_records = nrow (X);
+num_classes = ncol (Y_bin);
+
+# check if there is only one class label
+Y_bin_sum = sum (ppred (colSums (Y_bin), num_records, "=="));
+if (Y_bin_sum == 1) {
+	stop ("Y contains only one class label. No model will be learned!");
+} else if (Y_bin_sum > 1) {
+	stop ("Y is not properly dummy coded. Multiple columns of Y contain only ones!")
+}
+
+# split data into X_scale and X_cat
+if (fileR != " ") {
+	R = read (fileR);
+	R = order (target = R, by = 2); # sort by start indices
+	dummy_coded = ppred (R[,2], R[,3], "!=");
+	R_scale = removeEmpty (target = R[,2:3] * (1 - dummy_coded), margin = "rows");
+	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
+	if (fileS_map != " ") {
+		scale_feature_mapping = removeEmpty (target = (1 - dummy_coded) * seq (1, nrow (R)), margin = "rows");
+		write (scale_feature_mapping, fileS_map, format = fmtO);
+	}
+	if (fileC_map != " ") {
+		cat_feature_mapping = removeEmpty (target = dummy_coded * seq (1, nrow (R)), margin = "rows");	
+		write (cat_feature_mapping, fileC_map, format = fmtO);		
+	}
+	sum_dummy = sum (dummy_coded);	
+	if (sum_dummy == nrow (R)) { # all features categorical
+		print ("All features categorical");
+		num_cat_features = nrow (R_cat);
+		num_scale_features = 0;
+		X_cat = X;	
+		distinct_values = t (R_cat[,2] - R_cat[,1] + 1);
+		distinct_values_max = max (distinct_values);
+		distinct_values_offset = cumsum (t (distinct_values));
+		distinct_values_overall = sum (distinct_values);
+	} else if (sum_dummy == 0) { # all features scale
+		print ("All features scale");
+		num_scale_features = ncol (X);
+		num_cat_features = 0;
+		X_scale = X;
+		distinct_values_max = 1;
+	} else { # some features scale some features categorical 
+		num_cat_features = nrow (R_cat);
+		num_scale_features = nrow (R_scale);
+		distinct_values = t (R_cat[,2] - R_cat[,1] + 1);
+		distinct_values_max = max (distinct_values);
+		distinct_values_offset = cumsum (t (distinct_values));
+		distinct_values_overall = sum (distinct_values);
+		
+		W = matrix (1, rows = num_cat_features, cols = 1) %*% matrix ("1 -1", rows = 1, cols = 2);
+		W = matrix (W, rows = 2 * num_cat_features, cols = 1);
+		if (as.scalar (R_cat[num_cat_features, 2]) == ncol (X)) {
+			W[2 * num_cat_features,] = 0;
+		}
+		
+		last = ppred (R_cat[,2], ncol (X), "!=");
+		R_cat1 = (R_cat[,2] + 1) * last;
+		R_cat[,2] = (R_cat[,2] * (1 - last)) + R_cat1;
+		R_cat_vec = matrix (R_cat, rows = 2 * num_cat_features, cols = 1);	
+
+		col_tab = table (R_cat_vec, 1, W, ncol (X), 1);
+		col_ind = cumsum (col_tab);
+		
+		col_ind_cat = removeEmpty (target = col_ind * seq (1, ncol (X)), margin = "rows");
+		col_ind_scale = removeEmpty (target = (1 - col_ind) * seq (1, ncol (X)), margin = "rows");	
+		X_cat = X %*% table (col_ind_cat, seq (1, nrow (col_ind_cat)), ncol (X), nrow (col_ind_cat));
+		X_scale = X %*% table (col_ind_scale, seq (1, nrow (col_ind_scale)), ncol (X), nrow (col_ind_scale));		
+	}	
+} else { # only scale features exist
+	print ("All features scale");
+	num_scale_features = ncol (X);
+	num_cat_features = 0;
+	X_scale = X;
+	distinct_values_max = 1;
+}	
+
+if (num_scale_features > 0) {
+
+	print ("COMPUTING BINNING...");
+	bin_size = max (as.integer (num_records / num_bins), 1);
+	count_thresholds = matrix (0, rows = 1, cols = num_scale_features)
+	thresholds = matrix (0, rows = num_bins + 1, cols = num_scale_features)
+	parfor(i1 in 1:num_scale_features) { 
+		col = order (target = X_scale[,i1], by = 1, decreasing = FALSE);
+		[col_bins, num_bins_defined] = binning (col, bin_size, num_bins);
+		count_thresholds[,i1] = num_bins_defined;
+		thresholds[,i1] = col_bins;	
+	}
+	
+	print ("PREPROCESSING SCALE FEATURE MATRIX...");
+	min_num_bins = min (count_thresholds);
+	max_num_bins = max (count_thresholds);
+	total_num_bins = sum (count_thresholds);
+	cum_count_thresholds = t (cumsum (t (count_thresholds)));
+	X_scale_ext = matrix (0, rows = num_records, cols = total_num_bins);
+	parfor (i2 in 1:num_scale_features, check = 0) { 
+		Xi2 = X_scale[,i2];
+		count_threshold = as.scalar (count_thresholds[,i2]);
+		offset_feature = 1;
+		if (i2 > 1) {
+			offset_feature = offset_feature + as.integer (as.scalar (cum_count_thresholds[, (i2 - 1)]));
+		}
+
+		ti2 = t(thresholds[1:count_threshold, i2]);
+		X_scale_ext[,offset_feature:(offset_feature + count_threshold - 1)] = outer (Xi2, ti2, "<");
+	}
+}
+
+num_features_total = num_scale_features + num_cat_features;
+num_feature_samples = as.integer (floor (num_features_total ^ fpow));
+
+##### INITIALIZATION
+L = matrix (1, rows = num_records, cols = num_trees); # last visited node id for each training sample
+
+# create matrix of counts (generated by Poisson distribution) storing how many times each sample appears in each tree
+print ("CONPUTING COUNTS...");
+C = rand (rows = num_records, cols = num_trees, pdf = "poisson", lambda = rate);
+Ix_nonzero = ppred (C, 0, "!=");
+L = L * Ix_nonzero;
+total_counts = sum (C);
+
+
+# model
+# LARGE leaf nodes
+# NC_large[,1]: node id
+# NC_large[,2]: tree id
+# NC_large[,3]: class label
+# NC_large[,4]: no. of misclassified samples 
+# NC_large[,5]: 1 if special leaf (impure and 3 samples at that leaf > threshold) or 0 otherwise 
+NC_large = matrix (0, rows = 5, cols = 1); 
+
+# SMALL leaf nodes 
+# same schema as for LARGE leaf nodes (to be initialized)
+NC_small = matrix (0, rows = 5, cols = 1); 
+
+# LARGE internal nodes
+# Q_large[,1]: node id
+# Q_large[,2]: tree id
+Q_large = matrix (0, rows = 2, cols = num_trees); 
+Q_large[1,] = matrix (1, rows = 1, cols = num_trees);
+Q_large[2,] = t (seq (1, num_trees));
+
+# SMALL internal nodes
+# same schema as for LARGE internal nodes (to be initialized)
+Q_small = matrix (0, rows = 2, cols = 1); 
+
+# F_large[,1]: feature
+# F_large[,2]: type
+# F_large[,3]: offset 
+F_large = matrix (0, rows = 3, cols = 1);
+
+# same schema as for LARGE nodes
+F_small = matrix (0, rows = 3, cols = 1); 
+
+# split points for LARGE internal nodes
+S_large = matrix (0, rows = 1, cols = 1);
+
+# split points for SMALL internal nodes 
+S_small = matrix (0, rows = 1, cols = 1); 
+
+# initialize queue
+cur_nodes_large = matrix (1, rows = 2, cols = num_trees);
+cur_nodes_large[2,] = t (seq (1, num_trees));
+
+num_cur_nodes_large = num_trees;
+num_cur_nodes_small = 0;
+level = 0;
+
+while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) {
+	
+	level = level + 1;
+	print (" --- start level " + level + " --- ");
+	
+	##### PREPARE MODEL
+	if (num_cur_nodes_large > 0) { # LARGE nodes to process
+		cur_Q_large = matrix (0, rows = 2, cols = 2 * num_cur_nodes_large);
+		cur_NC_large = matrix (0, rows = 5, cols = 2 * num_cur_nodes_large); 
+		cur_F_large = matrix (0, rows = 3, cols = num_cur_nodes_large); 
+		cur_S_large = matrix (0, rows = 1, cols = num_cur_nodes_large * distinct_values_max); 
+		cur_nodes_small = matrix (0, rows = 3, cols = 2 * num_cur_nodes_large); 
+	}
+
+	##### LOOP OVER LARGE NODES...
+	parfor (i6 in 1:num_cur_nodes_large, check = 0) { 
+	
+		cur_node = as.scalar (cur_nodes_large[1,i6]);
+		cur_tree = as.scalar (cur_nodes_large[2,i6]);
+			
+		# select sample features WOR
+		feature_samples = sample (num_features_total, num_feature_samples);
+		feature_samples = order (target = feature_samples, by = 1);
+		num_scale_feature_samples = sum (ppred (feature_samples, num_scale_features, "<="));
+		num_cat_feature_samples = num_feature_samples - num_scale_feature_samples;
+		
+		# --- find best split ---
+		# samples that reach cur_node 
+		Ix = ppred (L[,cur_tree], cur_node, "==");		
+		
+		cur_Y_bin = Y_bin * (Ix * C[,cur_tree]);
+		label_counts_overall = colSums (cur_Y_bin);
+		label_sum_overall = sum (label_counts_overall);
+		label_dist_overall = label_counts_overall / label_sum_overall;
+
+		if (imp == "entropy") {
+			label_dist_zero = ppred (label_dist_overall, 0, "==");
+			cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2); # impurity before
+		} else { # imp == "Gini"
+			cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); # impurity before
+		}
+		best_scale_gain = 0;
+		best_cat_gain = 0;
+		if (num_scale_features > 0 & num_scale_feature_samples > 0) {
+			
+			scale_feature_samples = feature_samples[1:num_scale_feature_samples,];
+			
+			# main operation	
+			label_counts_left_scale = t (t (cur_Y_bin) %*% X_scale_ext); 
+		
+			# compute left and right label distribution
+			label_sum_left = rowSums (label_counts_left_scale);
+			label_dist_left = label_counts_left_scale / label_sum_left;
+			if (imp == "entropy") {
+				label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
+				log_label_dist_left = log (label_dist_left); # / log (2)
+				impurity_left_scale = - rowSums (label_dist_left * log_label_dist_left); 
+			} else { # imp == "Gini"
+				impurity_left_scale = rowSums (label_dist_left * (1 - label_dist_left)); 
+			}
+			#
+			label_counts_right_scale = - label_counts_left_scale + label_counts_overall; 
+			label_sum_right = rowSums (label_counts_right_scale);
+			label_dist_right = label_counts_right_scale / label_sum_right;
+			if (imp == "entropy") {
+				label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
+				log_label_dist_right = log (label_dist_right); # / log (2)
+				impurity_right_scale = - rowSums (label_dist_right * log_label_dist_right); 		
+			} else { # imp == "Gini"
+				impurity_right_scale = rowSums (label_dist_right * (1 - label_dist_right)); 
+			}
+			
+			I_gain_scale = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left_scale + ( label_sum_right / label_sum_overall ) * impurity_right_scale); 
+		
+			I_gain_scale = replace (target = I_gain_scale, pattern = "NaN", replacement = 0);	
+			
+			# determine best feature to split on and the split value
+			feature_start_ind = matrix (0, rows = 1, cols = num_scale_features);
+			feature_start_ind[1,1] = 1;
+			if (num_scale_features > 1) {
+				feature_start_ind[1,2:num_scale_features] = cum_count_thresholds[1,1:(num_scale_features - 1)] + 1;
+			}
+			max_I_gain_found = 0;
+			max_I_gain_found_ind = 0;
+			best_i = 0;
+			
+			for (i in 1:num_scale_feature_samples) { # assuming feature_samples is 5x1
+				cur_feature_samples_bin = as.scalar (scale_feature_samples[i,]); 
+				cur_start_ind = as.scalar (feature_start_ind[,cur_feature_samples_bin]);
+				cur_end_ind = as.scalar (cum_count_thresholds[,cur_feature_samples_bin]);
+				I_gain_portion = I_gain_scale[cur_start_ind:cur_end_ind,];
+				cur_max_I_gain = max (I_gain_portion);
+				cur_max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain_portion)));
+				if (cur_max_I_gain > max_I_gain_found) {
+					max_I_gain_found = cur_max_I_gain;
+					max_I_gain_found_ind = cur_max_I_gain_ind;
+					best_i = i;
+				}
+			}
+
+			best_scale_gain = max_I_gain_found;
+			max_I_gain_ind_scale = max_I_gain_found_ind;
+			best_scale_feature = 0;
+			if (best_i > 0) {
+				best_scale_feature = as.scalar (scale_feature_samples[best_i,]);
+			}
+			best_scale_split = max_I_gain_ind_scale;
+			if (best_scale_feature > 1) {
+				best_scale_split = best_scale_split + as.scalar(cum_count_thresholds[,(best_scale_feature - 1)]);
+			}					
+		}
+	
+		if (num_cat_features > 0 & num_cat_feature_samples > 0){
+			
+			cat_feature_samples = feature_samples[(num_scale_feature_samples + 1):(num_scale_feature_samples + num_cat_feature_samples),] - num_scale_features;
+			
+			# initialization
+			split_values_bin = matrix (0, rows = 1, cols = distinct_values_overall); 
+			split_values = split_values_bin; 
+			split_values_offset = matrix (0, rows = 1, cols = num_cat_features); 
+			I_gains = split_values_offset; 
+			impurities_left = split_values_offset;
+			impurities_right = split_values_offset;
+			best_label_counts_left = matrix (0, rows = num_cat_features, cols = num_classes);
+			best_label_counts_right = matrix (0, rows = num_cat_features, cols = num_classes);
+			
+			# main operation
+			label_counts = t (t (cur_Y_bin) %*% X_cat);  			
+			
+			parfor (i9 in 1:num_cat_feature_samples, check = 0){
+			
+				cur_cat_feature = as.scalar (cat_feature_samples[i9,1]);
+				start_ind = 1;
+				if (cur_cat_feature > 1) {
+					start_ind = start_ind + as.scalar (distinct_values_offset[(cur_cat_feature - 1),]);
+				}
+				offset = as.scalar (distinct_values[1,cur_cat_feature]);
+				
+				cur_label_counts = label_counts[start_ind:(start_ind + offset - 1),];
+								
+				label_sum = rowSums (cur_label_counts);
+				label_dist = cur_label_counts / label_sum;
+				if (imp == "entropy") {
+					label_dist = replace (target = label_dist, pattern = 0, replacement = 1);
+					log_label_dist = log (label_dist); # / log(2)
+					impurity = - rowSums (label_dist * log_label_dist); 
+					impurity = replace (target = impurity, pattern = "NaN", replacement = 1/0); 
+				} else { # imp == "Gini"
+					impurity = rowSums (label_dist * (1 - label_dist)); 				
+				}
+			
+				# sort cur feature by impurity
+				cur_distinct_values = seq (1, nrow (cur_label_counts));
+				cur_distinct_values_impurity = append (cur_distinct_values, impurity);
+				cur_feature_sorted = order (target = cur_distinct_values_impurity, by = 2, decreasing = FALSE);
+				P = table (cur_distinct_values, cur_feature_sorted); # permutation matrix
+				label_counts_sorted = P %*% cur_label_counts;
+
+				# compute left and right label distribution			
+				label_counts_left = cumsum (label_counts_sorted);
+			
+				label_sum_left = rowSums (label_counts_left);
+				label_dist_left = label_counts_left / label_sum_left;
+				label_dist_left = replace (target = label_dist_left, pattern = "NaN", replacement = 1);
+				if (imp == "entropy") {
+					label_dist_left = replace (target = label_dist_left, pattern = 0, replacement = 1);
+					log_label_dist_left = log (label_dist_left); # / log(2)
+					impurity_left = - rowSums (label_dist_left * log_label_dist_left);
+				} else { # imp == "Gini"
+					impurity_left = rowSums (label_dist_left * (1 - label_dist_left));					
+				}
+				#
+				label_counts_right = - label_counts_left + label_counts_overall;
+				label_sum_right = rowSums (label_counts_right);
+				label_dist_right = label_counts_right / label_sum_right;
+				label_dist_right = replace (target = label_dist_right, pattern = "NaN", replacement = 1);
+				if (imp == "entropy") {
+					label_dist_right = replace (target = label_dist_right, pattern = 0, replacement = 1);
+					log_label_dist_right = log (label_dist_right); # / log (2)
+					impurity_right = - rowSums (label_dist_right * log_label_dist_right);			
+				} else { # imp == "Gini"
+					impurity_right = rowSums (label_dist_right * (1 - label_dist_right));					
+				}
+				I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right);
+			
+				Ix_label_sum_left_zero = ppred (label_sum_left, 0, "==");
+				Ix_label_sum_right_zero = ppred (label_sum_right, 0, "==");
+				Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero;
+				I_gain = I_gain * (1 - Ix_label_sum_zero);	
+				
+				I_gain[nrow (I_gain),] = 0; # last entry invalid
+			
+				max_I_gain_ind = as.scalar (rowIndexMax (t (I_gain)));
+
+				split_values[1, start_ind:(start_ind + max_I_gain_ind - 1)] = t (cur_feature_sorted[1:max_I_gain_ind,1]);
+				for (i10 in 1:max_I_gain_ind) {
+					ind = as.scalar (cur_feature_sorted[i10,1]);
+					if (ind == 1) {
+						split_values_bin[1,start_ind] = 1.0; 
+					} else {
+						split_values_bin[1,(start_ind + ind - 1)] = 1.0;
+					}
+				}
+				split_values_offset[1,cur_cat_feature] = max_I_gain_ind;
+			
+				I_gains[1,cur_cat_feature] = max (I_gain);
+			
+				impurities_left[1,cur_cat_feature] = as.scalar (impurity_left[max_I_gain_ind,]);
+				impurities_right[1,cur_cat_feature] = as.scalar (impurity_right[max_I_gain_ind,]);
+				best_label_counts_left[cur_cat_feature,] = label_counts_left[max_I_gain_ind,];
+				best_label_counts_right[cur_cat_feature,] = label_counts_right[max_I_gain_ind,];				
+			}
+			
+			# determine best feature to split on and the split values
+			best_cat_feature = as.scalar (rowIndexMax (I_gains));
+			best_cat_gain = max (I_gains);
+			start_ind = 1;
+			if (best_cat_feature > 1) {
+				start_ind = start_ind + as.scalar (distinct_values_offset[(best_cat_feature - 1),]);
+			}
+			offset = as.scalar (distinct_values[1,best_cat_feature]);
+			best_split_values_bin = split_values_bin[1, start_ind:(start_ind + offset - 1)];		
+		}		
+	
+		# compare best scale feature to best cat. feature and pick the best one
+		if (num_scale_features > 0 & num_scale_feature_samples > 0 & best_scale_gain >= best_cat_gain & best_scale_gain > 0) {
+			
+			# --- update model ---
+			cur_F_large[1,i6] = best_scale_feature;
+			cur_F_large[2,i6] = 1;
+			cur_F_large[3,i6] = 1;			
+			cur_S_large[1,(i6 - 1) * distinct_values_max + 1] = thresholds[max_I_gain_ind_scale, best_scale_feature]; 
+				
+			left_child = 2 * (cur_node - 1) + 1 + 1;
+			right_child = 2 * (cur_node - 1) + 2 + 1;
+					
+			# samples going to the left subtree
+			Ix_left = X_scale_ext[,best_scale_split]; 
+											
+			Ix_left = Ix * Ix_left;		
+			Ix_right = Ix * (1 - Ix_left);
+				
+			L[,cur_tree] = L[,cur_tree] * (1 - Ix_left) + (Ix_left * left_child);
+			L[,cur_tree] = L[,cur_tree] * (1 - Ix_right) + (Ix_right * right_child);								
+			
+			left_child_size = sum (Ix_left * C[,cur_tree]);
+			right_child_size = sum (Ix_right * C[,cur_tree]);
+			
+			# check if left or right child is a leaf
+			left_pure = FALSE;
+			right_pure = FALSE;
+			cur_impurity_left = as.scalar(impurity_left_scale[best_scale_split,]); # max_I_gain_ind_scale
+			cur_impurity_right = as.scalar(impurity_right_scale[best_scale_split,]); # max_I_gain_ind_scale
+			if ( (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth)) & 
+			   (right_child_size <= num_leaf | cur_impurity_right == 0 | (level == depth)) | 
+			   (left_child_size <= threshold & right_child_size <= threshold & (level == depth)) ) { # both left and right nodes are leaf	
+				
+				cur_label_counts_left = label_counts_left_scale[best_scale_split,]; # max_I_gain_ind_scale
+				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
+				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
+				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label
+				left_pure = TRUE;	
+				# compute number of misclassified points 
+				cur_NC_large[4,(2 * (i6 - 1) + 1)] = left_child_size - max (cur_label_counts_left); 
+				
+				cur_label_counts_right = label_counts_overall - cur_label_counts_left;
+				cur_NC_large[1,(2 * i6)] = right_child; 
+				cur_NC_large[2,(2 * i6)] = cur_tree;
+				cur_NC_large[3,(2 * i6)] = as.scalar( rowIndexMax (cur_label_counts_right)); # leaf class label
+				right_pure = TRUE;	
+				# compute number of misclassified pints
+				cur_NC_large[4,(2 * i6)] = right_child_size - max (cur_label_counts_right); 
+				
+			} else if (left_child_size <= num_leaf | cur_impurity_left == 0 | (level == depth) | 
+					  (left_child_size <= threshold & (level == depth))) {	
+				
+				cur_label_counts_left = label_counts_left_scale[best_scale_split,]; # max_I_gain_ind_scale
+				cur_NC_large[1,(2 * (i6 - 1) + 1)] = left_child; 
+				cur_NC_large[2,(2 * (i6 - 1) + 1)] = cur_tree;
+				cur_NC_large[3,(2 * (i6 - 1) + 1)] = as.scalar( rowIndexMax (cur_label_counts_left)); # leaf class label				
+				left_pure = TRUE;
+				# compute number of misclassified points 
+				cur_NC_large[4,(2 * (i

<TRUNCATED>

[35/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMapper.java b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMapper.java
index 0c0d399..7fb1ccc 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMapper.java
@@ -1,112 +1,112 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.wink.json4j.JSONException;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-
-public class ApplyTfCSVMapper implements Mapper<LongWritable, Text, NullWritable, Text> {
-	
-	boolean _firstRecordInSplit = true;
-	boolean _partFileWithHeader = false;
-	
-	TfUtils tfmapper = null;
-	Reporter _reporter = null;
-	BufferedWriter br = null;
-	JobConf _rJob = null;
-	
-	@Override
-	public void configure(JobConf job) {
-		try {
-			_rJob = job;
-			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
-			tfmapper = new TfUtils(job);
-			
-			tfmapper.loadTfMetadata(job, true);
-			
-		} catch (IOException e) { throw new RuntimeException(e); }
-		catch(JSONException e)  { throw new RuntimeException(e); }
-
-	}
-	
-	@Override
-	public void map(LongWritable rawKey, Text rawValue, OutputCollector<NullWritable, Text> out, Reporter reporter) throws IOException  {
-		
-		if(_firstRecordInSplit)
-		{
-			_firstRecordInSplit = false;
-			_reporter = reporter;
-			
-			// generate custom output paths so that order of rows in the 
-			// output (across part files) matches w/ that from input data set
-			String partFileSuffix = tfmapper.getPartFileID(_rJob, rawKey.get());
-			Path mapOutputPath = new Path(tfmapper.getOutputPath() + "/transform-part-" + partFileSuffix);
-			
-			// setup the writer for mapper's output
-			// the default part-..... files will be deleted later once the job finishes 
-			br = new BufferedWriter(new OutputStreamWriter(FileSystem.get(_rJob).create( mapOutputPath, true)));
-		}
-		
-		// output the header line
-		if ( rawKey.get() == 0 && _partFileWithHeader ) 
-		{
-			_reporter = reporter;
-			tfmapper.processHeaderLine();
-			if ( tfmapper.hasHeader() )
-				return;
-		}
-		
-		// parse the input line and apply transformation
-		String[] words = tfmapper.getWords(rawValue);
-		
-		if(!tfmapper.omit(words))
-		{
-			try {
-				words = tfmapper.apply(words);
-				String outStr = tfmapper.checkAndPrepOutputString(words);
-				//out.collect(NullWritable.get(), new Text(outStr));
-				br.write(outStr + "\n");
-			} 
-			catch(DMLRuntimeException e) {
-				throw new RuntimeException(e.getMessage() + ": " + rawValue.toString());
-			}
-		}
-	}
-
-	@Override
-	public void close() throws IOException {
-		if ( br != null ) 
-			br.close();
-	}
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.wink.json4j.JSONException;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+
+public class ApplyTfCSVMapper implements Mapper<LongWritable, Text, NullWritable, Text> {
+	
+	boolean _firstRecordInSplit = true;
+	boolean _partFileWithHeader = false;
+	
+	TfUtils tfmapper = null;
+	Reporter _reporter = null;
+	BufferedWriter br = null;
+	JobConf _rJob = null;
+	
+	@Override
+	public void configure(JobConf job) {
+		try {
+			_rJob = job;
+			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
+			tfmapper = new TfUtils(job);
+			
+			tfmapper.loadTfMetadata(job, true);
+			
+		} catch (IOException e) { throw new RuntimeException(e); }
+		catch(JSONException e)  { throw new RuntimeException(e); }
+
+	}
+	
+	@Override
+	public void map(LongWritable rawKey, Text rawValue, OutputCollector<NullWritable, Text> out, Reporter reporter) throws IOException  {
+		
+		if(_firstRecordInSplit)
+		{
+			_firstRecordInSplit = false;
+			_reporter = reporter;
+			
+			// generate custom output paths so that order of rows in the 
+			// output (across part files) matches w/ that from input data set
+			String partFileSuffix = tfmapper.getPartFileID(_rJob, rawKey.get());
+			Path mapOutputPath = new Path(tfmapper.getOutputPath() + "/transform-part-" + partFileSuffix);
+			
+			// setup the writer for mapper's output
+			// the default part-..... files will be deleted later once the job finishes 
+			br = new BufferedWriter(new OutputStreamWriter(FileSystem.get(_rJob).create( mapOutputPath, true)));
+		}
+		
+		// output the header line
+		if ( rawKey.get() == 0 && _partFileWithHeader ) 
+		{
+			_reporter = reporter;
+			tfmapper.processHeaderLine();
+			if ( tfmapper.hasHeader() )
+				return;
+		}
+		
+		// parse the input line and apply transformation
+		String[] words = tfmapper.getWords(rawValue);
+		
+		if(!tfmapper.omit(words))
+		{
+			try {
+				words = tfmapper.apply(words);
+				String outStr = tfmapper.checkAndPrepOutputString(words);
+				//out.collect(NullWritable.get(), new Text(outStr));
+				br.write(outStr + "\n");
+			} 
+			catch(DMLRuntimeException e) {
+				throw new RuntimeException(e.getMessage() + ": " + rawValue.toString());
+			}
+		}
+	}
+
+	@Override
+	public void close() throws IOException {
+		if ( br != null ) 
+			br.close();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.java b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.java
index 693d687..061f2e3 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.java
@@ -1,160 +1,160 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.Function2;
-import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.broadcast.Broadcast;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import scala.Tuple2;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
-
-
-public class ApplyTfCSVSPARK {
-	
-	/**
-	 * Apply transformation metadata and generate the result in CSV format, as a
-	 * JavaRDD of Strings.
-	 */
-
-	public static JavaPairRDD<Long, String> runSparkJob(
-			SparkExecutionContext sec, JavaRDD<Tuple2<LongWritable, Text>> inputRDD, 
-			String tfMtdPath, String specFile, 
-			String tmpPath, CSVFileFormatProperties prop, 
-			int numCols, String headerLine
-		) throws IOException, ClassNotFoundException, InterruptedException, IllegalArgumentException, JSONException {
-
-		// Load transformation metadata and broadcast it
-		JobConf job = new JobConf();
-		FileSystem fs = FileSystem.get(job);
-		
-		String[] naStrings = TfUtils.parseNAStrings(prop.getNAStrings());
-		JSONObject spec = TfUtils.readSpec(fs, specFile);
-		TfUtils _tfmapper = new TfUtils(headerLine, prop.hasHeader(), prop.getDelim(), naStrings, spec, numCols, tfMtdPath, null, tmpPath);
-		
-		_tfmapper.loadTfMetadata();
-
-		Broadcast<TfUtils> bcast_tf = sec.getSparkContext().broadcast(_tfmapper);
-		
-		/*
-		 * Construct transformation metadata (map-side) -- the logic is similar
-		 * to GTFMTDMapper
-		 * 
-		 * Note: The result of mapPartitionsWithIndex is cached so that the
-		 * transformed data is not redundantly computed multiple times
-		 */
-		JavaPairRDD<Long, String> applyRDD = inputRDD
-				.mapPartitionsWithIndex( new ApplyTfCSVMap(bcast_tf),  true)
-				.mapToPair(
-						new PairFunction<String,Long,String>(){
-							private static final long serialVersionUID = 3868143093999082931L;
-							@Override
-							public Tuple2<Long, String> call(String t) throws Exception {
-								return new Tuple2<Long, String>(new Long(1), t);
-							}
-						}
-				).cache();
-
-		/*
-		 * An action to force execution of apply()
-		 * 
-		 * We need to trigger the execution of this RDD so as to ensure the
-		 * creation of a few metadata files (headers, dummycoded information,
-		 * etc.), which are referenced in the caller function.
-		 */
-		applyRDD.count();
-		
-		return applyRDD;
-	}
-
-	public static class ApplyTfCSVMap implements Function2<Integer, Iterator<Tuple2<LongWritable, Text>>, Iterator<String>> {
-
-		private static final long serialVersionUID = 1496686437276906911L;
-
-		TfUtils _tfmapper = null;
-		
-		ApplyTfCSVMap(boolean hasHeader, String delim, String naStrings, String specFile, String tmpPath, String tfMtdPath, long numCols, String headerLine, Broadcast<TfUtils> tf) throws IllegalArgumentException, IOException, JSONException {
-			_tfmapper = tf.getValue();
-		}
-		
-		ApplyTfCSVMap(Broadcast<TfUtils> tf) throws IllegalArgumentException, IOException, JSONException {
-			_tfmapper = tf.getValue();
-		}
-		
-		@Override
-		public Iterator<String> call(Integer partitionID,
-				Iterator<Tuple2<LongWritable, Text>> csvLines) throws Exception {
-			
-			boolean first = true;
-			Tuple2<LongWritable, Text> rec = null;
-			ArrayList<String> outLines = new ArrayList<String>();
-			
-			while(csvLines.hasNext()) {
-				rec = csvLines.next();
-				
-				if (first && partitionID == 0) {
-					first = false;
-					
-					_tfmapper.processHeaderLine();
-					
-					if (_tfmapper.hasHeader() ) {
-						//outLines.add(dcdHeader); // if the header needs to be preserved in the output file
-						continue; 
-					}
-				}
-				
-				// parse the input line and apply transformation
-			
-				String[] words = _tfmapper.getWords(rec._2());
-				
-				if(!_tfmapper.omit(words))
-				{
-					try {
-						words = _tfmapper.apply(words);
-						String outStr = _tfmapper.checkAndPrepOutputString(words);
-						outLines.add(outStr);
-					} 
-					catch(DMLRuntimeException e) {
-						throw new RuntimeException(e.getMessage() + ": " + rec._2().toString());
-					}
-				}
-			}
-			
-			return outLines.iterator();
-		}
-		
-	}
-
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.broadcast.Broadcast;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import scala.Tuple2;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+
+
+public class ApplyTfCSVSPARK {
+	
+	/**
+	 * Apply transformation metadata and generate the result in CSV format, as a
+	 * JavaRDD of Strings.
+	 */
+
+	public static JavaPairRDD<Long, String> runSparkJob(
+			SparkExecutionContext sec, JavaRDD<Tuple2<LongWritable, Text>> inputRDD, 
+			String tfMtdPath, String specFile, 
+			String tmpPath, CSVFileFormatProperties prop, 
+			int numCols, String headerLine
+		) throws IOException, ClassNotFoundException, InterruptedException, IllegalArgumentException, JSONException {
+
+		// Load transformation metadata and broadcast it
+		JobConf job = new JobConf();
+		FileSystem fs = FileSystem.get(job);
+		
+		String[] naStrings = TfUtils.parseNAStrings(prop.getNAStrings());
+		JSONObject spec = TfUtils.readSpec(fs, specFile);
+		TfUtils _tfmapper = new TfUtils(headerLine, prop.hasHeader(), prop.getDelim(), naStrings, spec, numCols, tfMtdPath, null, tmpPath);
+		
+		_tfmapper.loadTfMetadata();
+
+		Broadcast<TfUtils> bcast_tf = sec.getSparkContext().broadcast(_tfmapper);
+		
+		/*
+		 * Construct transformation metadata (map-side) -- the logic is similar
+		 * to GTFMTDMapper
+		 * 
+		 * Note: The result of mapPartitionsWithIndex is cached so that the
+		 * transformed data is not redundantly computed multiple times
+		 */
+		JavaPairRDD<Long, String> applyRDD = inputRDD
+				.mapPartitionsWithIndex( new ApplyTfCSVMap(bcast_tf),  true)
+				.mapToPair(
+						new PairFunction<String,Long,String>(){
+							private static final long serialVersionUID = 3868143093999082931L;
+							@Override
+							public Tuple2<Long, String> call(String t) throws Exception {
+								return new Tuple2<Long, String>(new Long(1), t);
+							}
+						}
+				).cache();
+
+		/*
+		 * An action to force execution of apply()
+		 * 
+		 * We need to trigger the execution of this RDD so as to ensure the
+		 * creation of a few metadata files (headers, dummycoded information,
+		 * etc.), which are referenced in the caller function.
+		 */
+		applyRDD.count();
+		
+		return applyRDD;
+	}
+
+	public static class ApplyTfCSVMap implements Function2<Integer, Iterator<Tuple2<LongWritable, Text>>, Iterator<String>> {
+
+		private static final long serialVersionUID = 1496686437276906911L;
+
+		TfUtils _tfmapper = null;
+		
+		ApplyTfCSVMap(boolean hasHeader, String delim, String naStrings, String specFile, String tmpPath, String tfMtdPath, long numCols, String headerLine, Broadcast<TfUtils> tf) throws IllegalArgumentException, IOException, JSONException {
+			_tfmapper = tf.getValue();
+		}
+		
+		ApplyTfCSVMap(Broadcast<TfUtils> tf) throws IllegalArgumentException, IOException, JSONException {
+			_tfmapper = tf.getValue();
+		}
+		
+		@Override
+		public Iterator<String> call(Integer partitionID,
+				Iterator<Tuple2<LongWritable, Text>> csvLines) throws Exception {
+			
+			boolean first = true;
+			Tuple2<LongWritable, Text> rec = null;
+			ArrayList<String> outLines = new ArrayList<String>();
+			
+			while(csvLines.hasNext()) {
+				rec = csvLines.next();
+				
+				if (first && partitionID == 0) {
+					first = false;
+					
+					_tfmapper.processHeaderLine();
+					
+					if (_tfmapper.hasHeader() ) {
+						//outLines.add(dcdHeader); // if the header needs to be preserved in the output file
+						continue; 
+					}
+				}
+				
+				// parse the input line and apply transformation
+			
+				String[] words = _tfmapper.getWords(rec._2());
+				
+				if(!_tfmapper.omit(words))
+				{
+					try {
+						words = _tfmapper.apply(words);
+						String outStr = _tfmapper.checkAndPrepOutputString(words);
+						outLines.add(outStr);
+					} 
+					catch(DMLRuntimeException e) {
+						throw new RuntimeException(e.getMessage() + ": " + rec._2().toString());
+					}
+				}
+			}
+			
+			return outLines.iterator();
+		}
+		
+	}
+
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
index f08c9ff..b61c781 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
@@ -1,355 +1,355 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.nio.charset.CharacterCodingException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.wink.json4j.JSONArray;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import scala.Tuple2;
-
-import org.apache.sysml.runtime.transform.MVImputeAgent.MVMethod;
-import org.apache.sysml.runtime.util.UtilFunctions;
-
-public class BinAgent extends TransformationAgent {
-	
-	private static final long serialVersionUID = 1917445005206076078L;
-
-	public static final String MIN_PREFIX = "min";
-	public static final String MAX_PREFIX = "max";
-	public static final String NBINS_PREFIX = "nbins";
-
-	private int[] _binList = null;
-	//private byte[] _binMethodList = null;	// Not used, since only equi-width is supported for now. 
-	private int[] _numBins = null;
-
-	private double[] _min=null, _max=null;	// min and max among non-missing values
-
-	private double[] _binWidths = null;		// width of a bin for each attribute
-	
-	BinAgent() { }
-	
-	BinAgent(JSONObject parsedSpec) throws JSONException {
-		
-		if ( !parsedSpec.containsKey(TX_METHOD.BIN.toString()) )
-			return;
-		
-		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.BIN.toString());
-		
-		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
-		//JSONArray mthds = (JSONArray) obj.get(JSON_MTHD);
-		JSONArray nbins = (JSONArray) obj.get(JSON_NBINS);
-			
-		assert(attrs.size() == nbins.size());
-			
-		_binList = new int[attrs.size()];
-		_numBins = new int[attrs.size()];
-		for(int i=0; i < _binList.length; i++) {
-			_binList[i] = UtilFunctions.toInt(attrs.get(i));
-			_numBins[i] = UtilFunctions.toInt(nbins.get(i)); 
-		}
-		
-		// initialize internal transformation metadata
-		_min = new double[_binList.length];
-		Arrays.fill(_min, Double.MAX_VALUE);
-		_max = new double[_binList.length];
-		Arrays.fill(_max, -Double.MAX_VALUE);
-		
-		_binWidths = new double[_binList.length];
-	}
-	
-	public void prepare(String[] words, TfUtils agents) {
-		if ( _binList == null )
-			return;
-		
-		for(int i=0; i <_binList.length; i++) {
-			int colID = _binList[i];
-			
-			String w = null;
-			double d = 0;
-				
-			// equi-width
-			w = UtilFunctions.unquote(words[colID-1].trim());
-			if(!agents.isNA(w)) {
-				d = UtilFunctions.parseToDouble(w);
-				if(d < _min[i])
-					_min[i] = d;
-				if(d > _max[i])
-					_max[i] = d;
-			}
-		}
-	}
-	
-	private DistinctValue prepMinOutput(int idx) throws CharacterCodingException {
-		String s =  MIN_PREFIX + Double.toString(_min[idx]);
-		return  new DistinctValue(s, -1L);
-	}
-	
-	private DistinctValue prepMaxOutput(int idx) throws CharacterCodingException {
-		String s =  MAX_PREFIX + Double.toString(_max[idx]);
-		return  new DistinctValue(s, -1L);
-	}
-	
-	private DistinctValue prepNBinsOutput(int idx) throws CharacterCodingException {
-		String s =  NBINS_PREFIX + Double.toString(_numBins[idx]);
-		return  new DistinctValue(s, -1L);
-	}
-	
-	/**
-	 * Method to output transformation metadata from the mappers. 
-	 * This information is collected and merged by the reducers.
-	 * 
-	 * @param out
-	 * @throws IOException
-	 */
-	@Override
-	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
-		if ( _binList == null )
-			return;
-		
-		try { 
-			for(int i=0; i < _binList.length; i++) {
-				int colID = _binList[i];
-				IntWritable iw = new IntWritable(-colID);
-				
-				out.collect(iw,  prepMinOutput(i));
-				out.collect(iw,  prepMaxOutput(i));
-				out.collect(iw,  prepNBinsOutput(i));
-			}
-		} catch(Exception e) {
-			throw new IOException(e);
-		}
-	}
-	
-	public ArrayList<Tuple2<Integer, DistinctValue>> mapOutputTransformationMetadata(int taskID, ArrayList<Tuple2<Integer, DistinctValue>> list, TfUtils agents) throws IOException {
-		if ( _binList == null )
-			return list;
-		
-		try { 
-			for(int i=0; i < _binList.length; i++) {
-				int colID = _binList[i];
-				Integer iw = -colID;
-				
-				list.add( new Tuple2<Integer,DistinctValue>(iw, prepMinOutput(i)) );
-				list.add( new Tuple2<Integer,DistinctValue>(iw, prepMaxOutput(i)) );
-				list.add( new Tuple2<Integer,DistinctValue>(iw, prepNBinsOutput(i)) );
-			}
-		} catch(Exception e) {
-			throw new IOException(e);
-		}
-		return list;
-	}
-
-	private void writeTfMtd(int colID, String min, String max, String binwidth, String nbins, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
-	{
-		Path pt = new Path(tfMtdDir+"/Bin/"+ agents.getName(colID) + BIN_FILE_SUFFIX);
-		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		br.write(colID + TXMTD_SEP + min + TXMTD_SEP + max + TXMTD_SEP + binwidth + TXMTD_SEP + nbins + "\n");
-		br.close();
-	}
-
-	/** 
-	 * Method to merge map output transformation metadata.
-	 * 
-	 * @param values
-	 * @return
-	 * @throws IOException 
-	 */
-	@Override
-	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
-		double min = Double.MAX_VALUE;
-		double max = -Double.MAX_VALUE;
-		int nbins = 0;
-		
-		DistinctValue val = new DistinctValue();
-		String w = null;
-		double d;
-		while(values.hasNext()) {
-			val.reset();
-			val = values.next();
-			w = val.getWord();
-			
-			if(w.startsWith(MIN_PREFIX)) {
-				d = UtilFunctions.parseToDouble(w.substring( MIN_PREFIX.length() ));
-				if ( d < min )
-					min = d;
-			}
-			else if(w.startsWith(MAX_PREFIX)) {
-				d = UtilFunctions.parseToDouble(w.substring( MAX_PREFIX.length() ));
-				if ( d > max )
-					max = d;
-			}
-			else if (w.startsWith(NBINS_PREFIX)) {
-				nbins = (int) UtilFunctions.parseToLong( w.substring(NBINS_PREFIX.length() ) );
-			}
-			else
-				throw new RuntimeException("MVImputeAgent: Invalid prefix while merging map output: " + w);
-		}
-		
-		// write merged metadata
-		double binwidth = (max-min)/nbins;
-		writeTfMtd(colID, Double.toString(min), Double.toString(max), Double.toString(binwidth), Integer.toString(nbins), outputDir, fs, agents);
-	}
-	
-	
-	public void outputTransformationMetadata(String outputDir, FileSystem fs, TfUtils agents) throws IOException {
-		if(_binList == null)
-			return;
-		
-		MVImputeAgent mvagent = agents.getMVImputeAgent();
-		for(int i=0; i < _binList.length; i++) {
-			int colID = _binList[i];
-			
-			// If the column is imputed with a constant, then adjust min and max based the value of the constant.
-			if ( mvagent.isImputed(colID) != -1 && mvagent.getMethod(colID) == MVMethod.CONSTANT ) 
-			{
-				double cst = UtilFunctions.parseToDouble( mvagent.getReplacement(colID) );
-				if ( cst < _min[i])
-					_min[i] = cst;
-				if ( cst > _max[i])
-					_max[i] = cst;
-			}
-			
-			double binwidth = (_max[i] - _min[i])/_numBins[i];
-			writeTfMtd(colID, Double.toString(_min[i]), Double.toString(_max[i]), Double.toString(binwidth), Integer.toString(_numBins[i]), outputDir, fs, agents);
-		}
-	}
-	
-	// ------------------------------------------------------------------------------------------------
-
-	public int[] getBinList() { return _binList; }
-	public int[] getNumBins() { return _numBins; }
-	public double[] getMin()  { return _min; }
-	public double[] getBinWidths() { return _binWidths; }
-	
-	/**
-	 * Method to load transform metadata for all attributes
-	 * 
-	 * @param job
-	 * @throws IOException
-	 */
-	@Override
-	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
-		if ( _binList == null )
-			return;
-		
-		if(fs.isDirectory(txMtdDir)) {
-			for(int i=0; i<_binList.length;i++) {
-				int colID = _binList[i];
-				
-				Path path = new Path( txMtdDir + "/Bin/" + agents.getName(colID) + BIN_FILE_SUFFIX);
-				TfUtils.checkValidInputFile(fs, path, true); 
-					
-				BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
-				// format: colID,min,max,nbins
-				String[] fields = br.readLine().split(TXMTD_SEP);
-				double min = UtilFunctions.parseToDouble(fields[1]);
-				//double max = UtilFunctions.parseToDouble(fields[2]);
-				double binwidth = UtilFunctions.parseToDouble(fields[3]);
-				int nbins = UtilFunctions.parseToInt(fields[4]);
-				
-				_numBins[i] = nbins;
-				_min[i] = min;
-				_binWidths[i] = binwidth; // (max-min)/nbins;
-				
-				br.close();
-			}
-		}
-		else {
-			fs.close();
-			throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
-		}
-	}
-	
-	/**
-	 * Method to apply transformations.
-	 * 
-	 * @param words
-	 * @return
-	 */
-	@Override
-	public String[] apply(String[] words, TfUtils agents) {
-		if ( _binList == null )
-			return words;
-	
-		for(int i=0; i < _binList.length; i++) {
-			int colID = _binList[i];
-			
-			try {
-			double val = UtilFunctions.parseToDouble(words[colID-1]);
-			int binid = 1;
-			double tmp = _min[i] + _binWidths[i];
-			while(val > tmp && binid < _numBins[i]) {
-				tmp += _binWidths[i];
-				binid++;
-			}
-			words[colID-1] = Integer.toString(binid);
-			} catch(NumberFormatException e)
-			{
-				throw new RuntimeException("Encountered \"" + words[colID-1] + "\" in column ID \"" + colID + "\", when expecting a numeric value. Consider adding \"" + words[colID-1] + "\" to na.strings, along with an appropriate imputation method.");
-			}
-		}
-		
-		return words;
-	}
-	
-	/**
-	 * Check if the given column ID is subjected to this transformation.
-	 * 
-	 */
-	public int isBinned(int colID)
-	{
-		if(_binList == null)
-			return -1;
-		
-		int idx = Arrays.binarySearch(_binList, colID);
-		return ( idx >= 0 ? idx : -1);
-	}
-
-
-	@Override
-	public void print() {
-		System.out.print("Binning List (Equi-width): \n    ");
-		for(int i : _binList) {
-			System.out.print(i + " ");
-		}
-		System.out.print("\n    ");
-		for(int b : _numBins) {
-			System.out.print(b + " ");
-		}
-		System.out.println();
-	}
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.wink.json4j.JSONArray;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import scala.Tuple2;
+
+import org.apache.sysml.runtime.transform.MVImputeAgent.MVMethod;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public class BinAgent extends TransformationAgent {
+	
+	private static final long serialVersionUID = 1917445005206076078L;
+
+	public static final String MIN_PREFIX = "min";
+	public static final String MAX_PREFIX = "max";
+	public static final String NBINS_PREFIX = "nbins";
+
+	private int[] _binList = null;
+	//private byte[] _binMethodList = null;	// Not used, since only equi-width is supported for now. 
+	private int[] _numBins = null;
+
+	private double[] _min=null, _max=null;	// min and max among non-missing values
+
+	private double[] _binWidths = null;		// width of a bin for each attribute
+	
+	BinAgent() { }
+	
+	BinAgent(JSONObject parsedSpec) throws JSONException {
+		
+		if ( !parsedSpec.containsKey(TX_METHOD.BIN.toString()) )
+			return;
+		
+		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.BIN.toString());
+		
+		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
+		//JSONArray mthds = (JSONArray) obj.get(JSON_MTHD);
+		JSONArray nbins = (JSONArray) obj.get(JSON_NBINS);
+			
+		assert(attrs.size() == nbins.size());
+			
+		_binList = new int[attrs.size()];
+		_numBins = new int[attrs.size()];
+		for(int i=0; i < _binList.length; i++) {
+			_binList[i] = UtilFunctions.toInt(attrs.get(i));
+			_numBins[i] = UtilFunctions.toInt(nbins.get(i)); 
+		}
+		
+		// initialize internal transformation metadata
+		_min = new double[_binList.length];
+		Arrays.fill(_min, Double.MAX_VALUE);
+		_max = new double[_binList.length];
+		Arrays.fill(_max, -Double.MAX_VALUE);
+		
+		_binWidths = new double[_binList.length];
+	}
+	
+	public void prepare(String[] words, TfUtils agents) {
+		if ( _binList == null )
+			return;
+		
+		for(int i=0; i <_binList.length; i++) {
+			int colID = _binList[i];
+			
+			String w = null;
+			double d = 0;
+				
+			// equi-width
+			w = UtilFunctions.unquote(words[colID-1].trim());
+			if(!agents.isNA(w)) {
+				d = UtilFunctions.parseToDouble(w);
+				if(d < _min[i])
+					_min[i] = d;
+				if(d > _max[i])
+					_max[i] = d;
+			}
+		}
+	}
+	
+	private DistinctValue prepMinOutput(int idx) throws CharacterCodingException {
+		String s =  MIN_PREFIX + Double.toString(_min[idx]);
+		return  new DistinctValue(s, -1L);
+	}
+	
+	private DistinctValue prepMaxOutput(int idx) throws CharacterCodingException {
+		String s =  MAX_PREFIX + Double.toString(_max[idx]);
+		return  new DistinctValue(s, -1L);
+	}
+	
+	private DistinctValue prepNBinsOutput(int idx) throws CharacterCodingException {
+		String s =  NBINS_PREFIX + Double.toString(_numBins[idx]);
+		return  new DistinctValue(s, -1L);
+	}
+	
+	/**
+	 * Method to output transformation metadata from the mappers. 
+	 * This information is collected and merged by the reducers.
+	 * 
+	 * @param out
+	 * @throws IOException
+	 */
+	@Override
+	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
+		if ( _binList == null )
+			return;
+		
+		try { 
+			for(int i=0; i < _binList.length; i++) {
+				int colID = _binList[i];
+				IntWritable iw = new IntWritable(-colID);
+				
+				out.collect(iw,  prepMinOutput(i));
+				out.collect(iw,  prepMaxOutput(i));
+				out.collect(iw,  prepNBinsOutput(i));
+			}
+		} catch(Exception e) {
+			throw new IOException(e);
+		}
+	}
+	
+	public ArrayList<Tuple2<Integer, DistinctValue>> mapOutputTransformationMetadata(int taskID, ArrayList<Tuple2<Integer, DistinctValue>> list, TfUtils agents) throws IOException {
+		if ( _binList == null )
+			return list;
+		
+		try { 
+			for(int i=0; i < _binList.length; i++) {
+				int colID = _binList[i];
+				Integer iw = -colID;
+				
+				list.add( new Tuple2<Integer,DistinctValue>(iw, prepMinOutput(i)) );
+				list.add( new Tuple2<Integer,DistinctValue>(iw, prepMaxOutput(i)) );
+				list.add( new Tuple2<Integer,DistinctValue>(iw, prepNBinsOutput(i)) );
+			}
+		} catch(Exception e) {
+			throw new IOException(e);
+		}
+		return list;
+	}
+
+	private void writeTfMtd(int colID, String min, String max, String binwidth, String nbins, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
+	{
+		Path pt = new Path(tfMtdDir+"/Bin/"+ agents.getName(colID) + BIN_FILE_SUFFIX);
+		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		br.write(colID + TXMTD_SEP + min + TXMTD_SEP + max + TXMTD_SEP + binwidth + TXMTD_SEP + nbins + "\n");
+		br.close();
+	}
+
+	/** 
+	 * Method to merge map output transformation metadata.
+	 * 
+	 * @param values
+	 * @return
+	 * @throws IOException 
+	 */
+	@Override
+	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
+		double min = Double.MAX_VALUE;
+		double max = -Double.MAX_VALUE;
+		int nbins = 0;
+		
+		DistinctValue val = new DistinctValue();
+		String w = null;
+		double d;
+		while(values.hasNext()) {
+			val.reset();
+			val = values.next();
+			w = val.getWord();
+			
+			if(w.startsWith(MIN_PREFIX)) {
+				d = UtilFunctions.parseToDouble(w.substring( MIN_PREFIX.length() ));
+				if ( d < min )
+					min = d;
+			}
+			else if(w.startsWith(MAX_PREFIX)) {
+				d = UtilFunctions.parseToDouble(w.substring( MAX_PREFIX.length() ));
+				if ( d > max )
+					max = d;
+			}
+			else if (w.startsWith(NBINS_PREFIX)) {
+				nbins = (int) UtilFunctions.parseToLong( w.substring(NBINS_PREFIX.length() ) );
+			}
+			else
+				throw new RuntimeException("MVImputeAgent: Invalid prefix while merging map output: " + w);
+		}
+		
+		// write merged metadata
+		double binwidth = (max-min)/nbins;
+		writeTfMtd(colID, Double.toString(min), Double.toString(max), Double.toString(binwidth), Integer.toString(nbins), outputDir, fs, agents);
+	}
+	
+	
+	public void outputTransformationMetadata(String outputDir, FileSystem fs, TfUtils agents) throws IOException {
+		if(_binList == null)
+			return;
+		
+		MVImputeAgent mvagent = agents.getMVImputeAgent();
+		for(int i=0; i < _binList.length; i++) {
+			int colID = _binList[i];
+			
+			// If the column is imputed with a constant, then adjust min and max based the value of the constant.
+			if ( mvagent.isImputed(colID) != -1 && mvagent.getMethod(colID) == MVMethod.CONSTANT ) 
+			{
+				double cst = UtilFunctions.parseToDouble( mvagent.getReplacement(colID) );
+				if ( cst < _min[i])
+					_min[i] = cst;
+				if ( cst > _max[i])
+					_max[i] = cst;
+			}
+			
+			double binwidth = (_max[i] - _min[i])/_numBins[i];
+			writeTfMtd(colID, Double.toString(_min[i]), Double.toString(_max[i]), Double.toString(binwidth), Integer.toString(_numBins[i]), outputDir, fs, agents);
+		}
+	}
+	
+	// ------------------------------------------------------------------------------------------------
+
+	public int[] getBinList() { return _binList; }
+	public int[] getNumBins() { return _numBins; }
+	public double[] getMin()  { return _min; }
+	public double[] getBinWidths() { return _binWidths; }
+	
+	/**
+	 * Method to load transform metadata for all attributes
+	 * 
+	 * @param job
+	 * @throws IOException
+	 */
+	@Override
+	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
+		if ( _binList == null )
+			return;
+		
+		if(fs.isDirectory(txMtdDir)) {
+			for(int i=0; i<_binList.length;i++) {
+				int colID = _binList[i];
+				
+				Path path = new Path( txMtdDir + "/Bin/" + agents.getName(colID) + BIN_FILE_SUFFIX);
+				TfUtils.checkValidInputFile(fs, path, true); 
+					
+				BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
+				// format: colID,min,max,nbins
+				String[] fields = br.readLine().split(TXMTD_SEP);
+				double min = UtilFunctions.parseToDouble(fields[1]);
+				//double max = UtilFunctions.parseToDouble(fields[2]);
+				double binwidth = UtilFunctions.parseToDouble(fields[3]);
+				int nbins = UtilFunctions.parseToInt(fields[4]);
+				
+				_numBins[i] = nbins;
+				_min[i] = min;
+				_binWidths[i] = binwidth; // (max-min)/nbins;
+				
+				br.close();
+			}
+		}
+		else {
+			fs.close();
+			throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
+		}
+	}
+	
+	/**
+	 * Method to apply transformations.
+	 * 
+	 * @param words
+	 * @return
+	 */
+	@Override
+	public String[] apply(String[] words, TfUtils agents) {
+		if ( _binList == null )
+			return words;
+	
+		for(int i=0; i < _binList.length; i++) {
+			int colID = _binList[i];
+			
+			try {
+			double val = UtilFunctions.parseToDouble(words[colID-1]);
+			int binid = 1;
+			double tmp = _min[i] + _binWidths[i];
+			while(val > tmp && binid < _numBins[i]) {
+				tmp += _binWidths[i];
+				binid++;
+			}
+			words[colID-1] = Integer.toString(binid);
+			} catch(NumberFormatException e)
+			{
+				throw new RuntimeException("Encountered \"" + words[colID-1] + "\" in column ID \"" + colID + "\", when expecting a numeric value. Consider adding \"" + words[colID-1] + "\" to na.strings, along with an appropriate imputation method.");
+			}
+		}
+		
+		return words;
+	}
+	
+	/**
+	 * Check if the given column ID is subjected to this transformation.
+	 * 
+	 */
+	public int isBinned(int colID)
+	{
+		if(_binList == null)
+			return -1;
+		
+		int idx = Arrays.binarySearch(_binList, colID);
+		return ( idx >= 0 ? idx : -1);
+	}
+
+
+	@Override
+	public void print() {
+		System.out.print("Binning List (Equi-width): \n    ");
+		for(int i : _binList) {
+			System.out.print(i + " ");
+		}
+		System.out.print("\n    ");
+		for(int b : _numBins) {
+			System.out.print(b + " ");
+		}
+		System.out.println();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/DistinctValue.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/DistinctValue.java b/src/main/java/org/apache/sysml/runtime/transform/DistinctValue.java
index d44a904..2e52657 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/DistinctValue.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/DistinctValue.java
@@ -1,108 +1,108 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-import org.apache.sysml.runtime.util.UtilFunctions;
-
-public class DistinctValue implements Writable, Serializable {
-	
-	private static final long serialVersionUID = -8236705946336974836L;
-
-	private static final byte [] EMPTY_BYTES = new byte[0];
-	  
-	// word (distinct value)
-	private byte[] _bytes;
-	private int _length;
-	// count
-	private long _count;
-	
-	public DistinctValue() {
-		_bytes = EMPTY_BYTES;
-		_length = 0;
-		_count = -1;
-	}
-	
-	public DistinctValue(String w, long count) throws CharacterCodingException {
-	    ByteBuffer bb = Text.encode(w, true);
-	    _bytes = bb.array();
-	    _length = bb.limit();
-		_count = count;
-	}
-	
-	public DistinctValue(OffsetCount oc) throws CharacterCodingException 
-	{
-		this(oc.filename + "," + oc.fileOffset, oc.count);
-	}
-	
-	public void reset() {
-		_bytes = EMPTY_BYTES;
-		_length = 0;
-		_count = -1;
-	}
-	
-	public String getWord() {  return new String( _bytes, 0, _length, Charset.forName("UTF-8") ); }
-	public long getCount() { return _count; }
-	
-	@Override
-	public void write(DataOutput out) throws IOException {
-	    // write word
-		WritableUtils.writeVInt(out, _length);
-	    out.write(_bytes, 0, _length);
-		// write count
-	    out.writeLong(_count);
-	}
-	
-	@Override
-	public void readFields(DataInput in) throws IOException {
-	    // read word 
-		int newLength = WritableUtils.readVInt(in);
-	    _bytes = new byte[newLength];
-	    in.readFully(_bytes, 0, newLength);
-	    _length = newLength;
-	    if (_length != _bytes.length)
-	    	System.out.println("ERROR in DistinctValue.readFields()");
-	    // read count
-	    _count = in.readLong();
-	}
-	
-	public OffsetCount getOffsetCount() {
-		OffsetCount oc = new OffsetCount();
-		String[] parts = getWord().split(",");
-		oc.filename = parts[0];
-		oc.fileOffset = UtilFunctions.parseToLong(parts[1]);
-		oc.count = getCount();
-		
-		return oc;
-	}
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public class DistinctValue implements Writable, Serializable {
+	
+	private static final long serialVersionUID = -8236705946336974836L;
+
+	private static final byte [] EMPTY_BYTES = new byte[0];
+	  
+	// word (distinct value)
+	private byte[] _bytes;
+	private int _length;
+	// count
+	private long _count;
+	
+	public DistinctValue() {
+		_bytes = EMPTY_BYTES;
+		_length = 0;
+		_count = -1;
+	}
+	
+	public DistinctValue(String w, long count) throws CharacterCodingException {
+	    ByteBuffer bb = Text.encode(w, true);
+	    _bytes = bb.array();
+	    _length = bb.limit();
+		_count = count;
+	}
+	
+	public DistinctValue(OffsetCount oc) throws CharacterCodingException 
+	{
+		this(oc.filename + "," + oc.fileOffset, oc.count);
+	}
+	
+	public void reset() {
+		_bytes = EMPTY_BYTES;
+		_length = 0;
+		_count = -1;
+	}
+	
+	public String getWord() {  return new String( _bytes, 0, _length, Charset.forName("UTF-8") ); }
+	public long getCount() { return _count; }
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+	    // write word
+		WritableUtils.writeVInt(out, _length);
+	    out.write(_bytes, 0, _length);
+		// write count
+	    out.writeLong(_count);
+	}
+	
+	@Override
+	public void readFields(DataInput in) throws IOException {
+	    // read word 
+		int newLength = WritableUtils.readVInt(in);
+	    _bytes = new byte[newLength];
+	    in.readFully(_bytes, 0, newLength);
+	    _length = newLength;
+	    if (_length != _bytes.length)
+	    	System.out.println("ERROR in DistinctValue.readFields()");
+	    // read count
+	    _count = in.readLong();
+	}
+	
+	public OffsetCount getOffsetCount() {
+		OffsetCount oc = new OffsetCount();
+		String[] parts = getWord().split(",");
+		oc.filename = parts[0];
+		oc.fileOffset = UtilFunctions.parseToLong(parts[1]);
+		oc.count = getCount();
+		
+		return oc;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
index a1c76ba..079ad58 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
@@ -1,426 +1,426 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.wink.json4j.JSONArray;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import com.google.common.base.Functions;
-import com.google.common.collect.Ordering;
-import org.apache.sysml.runtime.util.UtilFunctions;
-
-public class DummycodeAgent extends TransformationAgent {	
-	
-	private static final long serialVersionUID = 5832130477659116489L;
-
-	private int[] _dcdList = null;
-	private long numCols = 0;
-	
-	private HashMap<Integer, HashMap<String,String>> _finalMaps = null;
-	private HashMap<Integer, HashMap<String,Long>> _finalMapsCP = null;
-	private int[] _binList = null;
-	private int[] _numBins = null;
-	
-	private int[] _domainSizes = null;			// length = #of dummycoded columns
-	private int[] _dcdColumnMap = null;			// to help in translating between original and dummycoded column IDs
-	private long _dummycodedLength = 0;			// #of columns after dummycoded
-	
-	DummycodeAgent(int[] list) {
-		_dcdList = list;
-	}
-	
-	DummycodeAgent(JSONObject parsedSpec, long ncol) throws JSONException {
-		numCols = ncol;
-		
-		if ( !parsedSpec.containsKey(TX_METHOD.DUMMYCODE.toString()) )
-			return;
-		
-		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.DUMMYCODE.toString());
-		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
-			
-		_dcdList = new int[attrs.size()];
-		for(int i=0; i < _dcdList.length; i++) 
-			_dcdList[i] = UtilFunctions.toInt(attrs.get(i));
-	}
-	
-	public int[] dcdList() {
-		return _dcdList;
-	}
-	
-	/**
-	 * Method to output transformation metadata from the mappers. 
-	 * This information is collected and merged by the reducers.
-	 * 
-	 * @param out
-	 * @throws IOException
-	 * 
-	 */
-	@Override
-	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
-		// There is no metadata required for dummycode.
-		// Required information is output from RecodeAgent.
-		return;
-	}
-	
-	@Override
-	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values,
-			String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
-		// Nothing to do here
-	}
-
-	public void setRecodeMaps(HashMap<Integer, HashMap<String,String>> maps) {
-		_finalMaps = maps;
-	}
-	
-	public void setRecodeMapsCP(HashMap<Integer, HashMap<String,Long>> maps) {
-		_finalMapsCP = maps;
-	}
-	
-	public void setNumBins(int[] binList, int[] numbins) {
-		_binList = binList;
-		_numBins = numbins;
-	}
-	
-	/**
-	 * Method to generate dummyCodedMaps.csv, with the range of column IDs for each variable in the original data.
-	 * 
-	 * Each line in dummyCodedMaps.csv file is of the form: [ColID, 1/0, st, end]
-	 * 		1/0 indicates if ColID is dummycoded or not
-	 * 		[st,end] is the range of dummycoded column numbers for the given ColID
-	 * 
-	 * It also generates coltypes.csv, with the type (scale, nominal, etc.) of columns in the output.
-	 * Recoded columns are of type nominal, binner columns are of type ordinal, dummycoded columns are of type 
-	 * dummycoded, and the remaining are of type scale.
-	 * 
-	 * @param fs
-	 * @param txMtdDir
-	 * @param numCols
-	 * @param ra
-	 * @param ba
-	 * @return Number of columns in the transformed data
-	 * @throws IOException
-	 */
-	public int genDcdMapsAndColTypes(FileSystem fs, String txMtdDir, int numCols, TfUtils agents) throws IOException {
-		
-		// initialize all column types in the transformed data to SCALE
-		ColumnTypes[] ctypes = new ColumnTypes[(int) _dummycodedLength];
-		for(int i=0; i < _dummycodedLength; i++)
-			ctypes[i] = ColumnTypes.SCALE;
-		
-		_dcdColumnMap = new int[numCols];
-
-		Path pt=new Path(txMtdDir+"/Dummycode/" + DCD_FILE_NAME);
-		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		
-		int sum=1;
-		int idx = 0;
-		for(int colID=1; colID <= numCols; colID++) 
-		{
-			if ( _dcdList != null && idx < _dcdList.length && _dcdList[idx] == colID )
-			{
-				br.write(colID + "," + "1" + "," + sum + "," + (sum+_domainSizes[idx]-1) + "\n");
-				_dcdColumnMap[colID-1] = (sum+_domainSizes[idx]-1)-1;
-
-				for(int i=sum; i <=(sum+_domainSizes[idx]-1); i++)
-					ctypes[i-1] = ColumnTypes.DUMMYCODED;
-				
-				sum += _domainSizes[idx];
-				idx++;
-			}
-			else 
-			{
-				br.write(colID + "," + "0" + "," + sum + "," + sum + "\n");
-				_dcdColumnMap[colID-1] = sum-1;
-				
-				if ( agents.getBinAgent().isBinned(colID) != -1 )
-					ctypes[sum-1] = ColumnTypes.ORDINAL;	// binned variable results in an ordinal column
-				
-				if ( agents.getRecodeAgent().isRecoded(colID) != -1 )
-					ctypes[sum-1] = ColumnTypes.NOMINAL;
-				
-				sum += 1;
-			}
-		}
-		br.close();
-
-		// Write coltypes.csv
-		pt=new Path(txMtdDir+"/" + COLTYPES_FILE_NAME);
-		br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		
-		br.write(columnTypeToID(ctypes[0]) + "");
-		for(int i = 1; i < _dummycodedLength; i++) 
-			br.write( "," + columnTypeToID(ctypes[i]));
-		br.close();
-		
-		return sum-1;
-	}
-	
-	/**
-	 * Given a dummycoded column id, find the corresponding original column ID.
-	 *  
-	 * @param colID
-	 * @return
-	 */
-	public int mapDcdColumnID(int colID) 
-	{
-		for(int i=0; i < _dcdColumnMap.length; i++)
-		{
-			int st = (i==0 ? 1 : _dcdColumnMap[i-1]+1+1);
-			int end = _dcdColumnMap[i]+1;
-			//System.out.println((i+1) + ": " + "[" + st + "," + end + "]");
-			
-			if ( colID >= st && colID <= end)
-				return i+1;
-		}
-		return -1;
-	}
-	
-	public String constructDummycodedHeader(String header, Pattern delim) {
-		
-		if(_dcdList == null && _binList == null )
-			// none of the columns are dummycoded, simply return the given header
-			return header;
-		
-		String[] names = delim.split(header, -1);
-		List<String> newNames = null;
-		
-		StringBuilder sb = new StringBuilder();
-		
-		// Dummycoding can be performed on either on a recoded column or on a binned column
-		
-		// process recoded columns
-		if(_finalMapsCP != null && _dcdList != null) 
-		{
-			for(int i=0; i <_dcdList.length; i++) 
-			{
-				int colID = _dcdList[i];
-				HashMap<String,Long> map = _finalMapsCP.get(colID);
-				String colName = UtilFunctions.unquote(names[colID-1]);
-				
-				if ( map != null  ) 
-				{
-					// order map entries by their recodeID
-					Ordering<String> valueComparator = Ordering.natural().onResultOf(Functions.forMap(map));
-					newNames = valueComparator.sortedCopy(map.keySet());
-					
-					// construct concatenated string of map entries
-					sb.setLength(0);
-					for(int idx=0; idx < newNames.size(); idx++) 
-					{
-						if(idx==0) 
-							sb.append( colName + DCD_NAME_SEP + newNames.get(idx));
-						else
-							sb.append( delim + colName + DCD_NAME_SEP + newNames.get(idx));
-					}
-					names[colID-1] = sb.toString();			// replace original column name with dcd name
-				}
-			}
-		}
-		else if(_finalMaps != null && _dcdList != null) {
-			for(int i=0; i <_dcdList.length; i++) {
-				int colID = _dcdList[i];
-				HashMap<String,String> map = _finalMaps.get(colID);
-				String colName = UtilFunctions.unquote(names[colID-1]);
-				
-				if ( map != null ) 
-				{
-					// order map entries by their recodeID (represented as Strings .. "1", "2", etc.)
-					Ordering<String> orderByID = new Ordering<String>() 
-					{
-			    		public int compare(String s1, String s2) {
-			        		return (Integer.parseInt(s1) - Integer.parseInt(s2));
-			    		}
-					};
-					
-					newNames = orderByID.onResultOf(Functions.forMap(map)).sortedCopy(map.keySet());
-					// construct concatenated string of map entries
-					sb.setLength(0);
-					for(int idx=0; idx < newNames.size(); idx++) 
-					{
-						if(idx==0) 
-							sb.append( colName + DCD_NAME_SEP + newNames.get(idx));
-						else
-							sb.append( delim + colName + DCD_NAME_SEP + newNames.get(idx));
-					}
-					names[colID-1] = sb.toString();			// replace original column name with dcd name
-				}
-			}
-		}
-		
-		// process binned columns
-		if (_binList != null) 
-			for(int i=0; i < _binList.length; i++) 
-			{
-				int colID = _binList[i];
-				
-				// need to consider only binned and dummycoded columns
-				if(isDummyCoded(colID) == -1)
-					continue;
-				
-				int numBins = _numBins[i];
-				String colName = UtilFunctions.unquote(names[colID-1]);
-				
-				sb.setLength(0);
-				for(int idx=0; idx < numBins; idx++) 
-					if(idx==0) 
-						sb.append( colName + DCD_NAME_SEP + "Bin" + (idx+1) );
-					else
-						sb.append( delim + colName + DCD_NAME_SEP + "Bin" + (idx+1) );
-				names[colID-1] = sb.toString();			// replace original column name with dcd name
-			}
-		
-		// Construct the full header
-		sb.setLength(0);
-		for(int colID=0; colID < names.length; colID++) 
-		{
-			if (colID == 0)
-				sb.append(names[colID]);
-			else
-				sb.append(delim + names[colID]);
-		}
-		//System.out.println("DummycodedHeader: " + sb.toString());
-		
-		return sb.toString();
-	}
-	
-	@Override
-	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
-		if ( _dcdList == null )
-		{
-			_dummycodedLength = numCols;
-			return;
-		}
-		
-		// sort to-be dummycoded column IDs in ascending order. This is the order in which the new dummycoded record is constructed in apply() function.
-		Arrays.sort(_dcdList);	
-		_domainSizes = new int[_dcdList.length];
-
-		_dummycodedLength = numCols;
-		
-		//HashMap<String, String> map = null;
-		for(int i=0; i<_dcdList.length; i++) {
-			int colID = _dcdList[i];
-			
-			// Find the domain size for colID using _finalMaps or _finalMapsCP
-			int domainSize = 0;
-			if(_finalMaps != null) {
-				if(_finalMaps.get(colID) != null)
-					domainSize = _finalMaps.get(colID).size();
-			}
-			else {
-				if(_finalMapsCP.get(colID) != null)
-					domainSize = _finalMapsCP.get(colID).size();
-			}
-			
-			if ( domainSize != 0 ) {
-				// dummycoded column
-				_domainSizes[i] = domainSize;
-			}
-			else {
-				// binned column
-				if ( _binList != null )
-				for(int j=0; j<_binList.length; j++) {
-					if (colID == _binList[j]) {
-						_domainSizes[i] = _numBins[j];
-						break;
-					}
-				}
-			}
-			_dummycodedLength += _domainSizes[i]-1;
-			//System.out.println("colID=" + colID + ", domainsize=" + _domainSizes[i] + ", dcdLength=" + _dummycodedLength);
-		}
-	}
-
-	/**
-	 * Method to apply transformations.
-	 * 
-	 * @param words
-	 * @return
-	 */
-	@Override
-	public String[] apply(String[] words, TfUtils agents) {
-		
-		if ( _dcdList == null )
-			return words;
-		
-		String[] nwords = new String[(int)_dummycodedLength];
-		
-		int rcdVal = 0;
-		
-		for(int colID=1, idx=0, ncolID=1; colID <= words.length; colID++) {
-			if(idx < _dcdList.length && colID==_dcdList[idx]) {
-				// dummycoded columns
-				try {
-				rcdVal = UtilFunctions.parseToInt(UtilFunctions.unquote(words[colID-1]));
-				nwords[ ncolID-1+rcdVal-1 ] = "1";
-				ncolID += _domainSizes[idx];
-				idx++;
-				} catch (Exception e) {
-					System.out.println("Error in dummycoding: colID="+colID + ", rcdVal=" + rcdVal+", word="+words[colID-1] + ", domainSize=" + _domainSizes[idx] + ", dummyCodedLength=" + _dummycodedLength);
-					throw new RuntimeException(e);
-				}
-			}
-			else {
-				nwords[ncolID-1] = words[colID-1];
-				ncolID++;
-			}
-		}
-		
-		return nwords;
-	}
-	
-	/**
-	 * Check if the given column ID is subjected to this transformation.
-	 * 
-	 */
-	public int isDummyCoded(int colID)
-	{
-		if(_dcdList == null)
-			return -1;
-		
-		int idx = Arrays.binarySearch(_dcdList, colID);
-		return ( idx >= 0 ? idx : -1);
-	}
-	
-	@Override
-	public void print() {
-		System.out.print("Dummycoding List: \n    ");
-		for(int i : _dcdList) {
-			System.out.print(i + " ");
-		}
-		System.out.println();
-	}
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.wink.json4j.JSONArray;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Ordering;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public class DummycodeAgent extends TransformationAgent {	
+	
+	private static final long serialVersionUID = 5832130477659116489L;
+
+	private int[] _dcdList = null;
+	private long numCols = 0;
+	
+	private HashMap<Integer, HashMap<String,String>> _finalMaps = null;
+	private HashMap<Integer, HashMap<String,Long>> _finalMapsCP = null;
+	private int[] _binList = null;
+	private int[] _numBins = null;
+	
+	private int[] _domainSizes = null;			// length = #of dummycoded columns
+	private int[] _dcdColumnMap = null;			// to help in translating between original and dummycoded column IDs
+	private long _dummycodedLength = 0;			// #of columns after dummycoded
+	
+	DummycodeAgent(int[] list) {
+		_dcdList = list;
+	}
+	
+	DummycodeAgent(JSONObject parsedSpec, long ncol) throws JSONException {
+		numCols = ncol;
+		
+		if ( !parsedSpec.containsKey(TX_METHOD.DUMMYCODE.toString()) )
+			return;
+		
+		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.DUMMYCODE.toString());
+		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
+			
+		_dcdList = new int[attrs.size()];
+		for(int i=0; i < _dcdList.length; i++) 
+			_dcdList[i] = UtilFunctions.toInt(attrs.get(i));
+	}
+	
+	public int[] dcdList() {
+		return _dcdList;
+	}
+	
+	/**
+	 * Method to output transformation metadata from the mappers. 
+	 * This information is collected and merged by the reducers.
+	 * 
+	 * @param out
+	 * @throws IOException
+	 * 
+	 */
+	@Override
+	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
+		// There is no metadata required for dummycode.
+		// Required information is output from RecodeAgent.
+		return;
+	}
+	
+	@Override
+	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values,
+			String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
+		// Nothing to do here
+	}
+
+	public void setRecodeMaps(HashMap<Integer, HashMap<String,String>> maps) {
+		_finalMaps = maps;
+	}
+	
+	public void setRecodeMapsCP(HashMap<Integer, HashMap<String,Long>> maps) {
+		_finalMapsCP = maps;
+	}
+	
+	public void setNumBins(int[] binList, int[] numbins) {
+		_binList = binList;
+		_numBins = numbins;
+	}
+	
+	/**
+	 * Method to generate dummyCodedMaps.csv, with the range of column IDs for each variable in the original data.
+	 * 
+	 * Each line in dummyCodedMaps.csv file is of the form: [ColID, 1/0, st, end]
+	 * 		1/0 indicates if ColID is dummycoded or not
+	 * 		[st,end] is the range of dummycoded column numbers for the given ColID
+	 * 
+	 * It also generates coltypes.csv, with the type (scale, nominal, etc.) of columns in the output.
+	 * Recoded columns are of type nominal, binner columns are of type ordinal, dummycoded columns are of type 
+	 * dummycoded, and the remaining are of type scale.
+	 * 
+	 * @param fs
+	 * @param txMtdDir
+	 * @param numCols
+	 * @param ra
+	 * @param ba
+	 * @return Number of columns in the transformed data
+	 * @throws IOException
+	 */
+	public int genDcdMapsAndColTypes(FileSystem fs, String txMtdDir, int numCols, TfUtils agents) throws IOException {
+		
+		// initialize all column types in the transformed data to SCALE
+		ColumnTypes[] ctypes = new ColumnTypes[(int) _dummycodedLength];
+		for(int i=0; i < _dummycodedLength; i++)
+			ctypes[i] = ColumnTypes.SCALE;
+		
+		_dcdColumnMap = new int[numCols];
+
+		Path pt=new Path(txMtdDir+"/Dummycode/" + DCD_FILE_NAME);
+		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		
+		int sum=1;
+		int idx = 0;
+		for(int colID=1; colID <= numCols; colID++) 
+		{
+			if ( _dcdList != null && idx < _dcdList.length && _dcdList[idx] == colID )
+			{
+				br.write(colID + "," + "1" + "," + sum + "," + (sum+_domainSizes[idx]-1) + "\n");
+				_dcdColumnMap[colID-1] = (sum+_domainSizes[idx]-1)-1;
+
+				for(int i=sum; i <=(sum+_domainSizes[idx]-1); i++)
+					ctypes[i-1] = ColumnTypes.DUMMYCODED;
+				
+				sum += _domainSizes[idx];
+				idx++;
+			}
+			else 
+			{
+				br.write(colID + "," + "0" + "," + sum + "," + sum + "\n");
+				_dcdColumnMap[colID-1] = sum-1;
+				
+				if ( agents.getBinAgent().isBinned(colID) != -1 )
+					ctypes[sum-1] = ColumnTypes.ORDINAL;	// binned variable results in an ordinal column
+				
+				if ( agents.getRecodeAgent().isRecoded(colID) != -1 )
+					ctypes[sum-1] = ColumnTypes.NOMINAL;
+				
+				sum += 1;
+			}
+		}
+		br.close();
+
+		// Write coltypes.csv
+		pt=new Path(txMtdDir+"/" + COLTYPES_FILE_NAME);
+		br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		
+		br.write(columnTypeToID(ctypes[0]) + "");
+		for(int i = 1; i < _dummycodedLength; i++) 
+			br.write( "," + columnTypeToID(ctypes[i]));
+		br.close();
+		
+		return sum-1;
+	}
+	
+	/**
+	 * Given a dummycoded column id, find the corresponding original column ID.
+	 *  
+	 * @param colID
+	 * @return
+	 */
+	public int mapDcdColumnID(int colID) 
+	{
+		for(int i=0; i < _dcdColumnMap.length; i++)
+		{
+			int st = (i==0 ? 1 : _dcdColumnMap[i-1]+1+1);
+			int end = _dcdColumnMap[i]+1;
+			//System.out.println((i+1) + ": " + "[" + st + "," + end + "]");
+			
+			if ( colID >= st && colID <= end)
+				return i+1;
+		}
+		return -1;
+	}
+	
+	public String constructDummycodedHeader(String header, Pattern delim) {
+		
+		if(_dcdList == null && _binList == null )
+			// none of the columns are dummycoded, simply return the given header
+			return header;
+		
+		String[] names = delim.split(header, -1);
+		List<String> newNames = null;
+		
+		StringBuilder sb = new StringBuilder();
+		
+		// Dummycoding can be performed on either on a recoded column or on a binned column
+		
+		// process recoded columns
+		if(_finalMapsCP != null && _dcdList != null) 
+		{
+			for(int i=0; i <_dcdList.length; i++) 
+			{
+				int colID = _dcdList[i];
+				HashMap<String,Long> map = _finalMapsCP.get(colID);
+				String colName = UtilFunctions.unquote(names[colID-1]);
+				
+				if ( map != null  ) 
+				{
+					// order map entries by their recodeID
+					Ordering<String> valueComparator = Ordering.natural().onResultOf(Functions.forMap(map));
+					newNames = valueComparator.sortedCopy(map.keySet());
+					
+					// construct concatenated string of map entries
+					sb.setLength(0);
+					for(int idx=0; idx < newNames.size(); idx++) 
+					{
+						if(idx==0) 
+							sb.append( colName + DCD_NAME_SEP + newNames.get(idx));
+						else
+							sb.append( delim + colName + DCD_NAME_SEP + newNames.get(idx));
+					}
+					names[colID-1] = sb.toString();			// replace original column name with dcd name
+				}
+			}
+		}
+		else if(_finalMaps != null && _dcdList != null) {
+			for(int i=0; i <_dcdList.length; i++) {
+				int colID = _dcdList[i];
+				HashMap<String,String> map = _finalMaps.get(colID);
+				String colName = UtilFunctions.unquote(names[colID-1]);
+				
+				if ( map != null ) 
+				{
+					// order map entries by their recodeID (represented as Strings .. "1", "2", etc.)
+					Ordering<String> orderByID = new Ordering<String>() 
+					{
+			    		public int compare(String s1, String s2) {
+			        		return (Integer.parseInt(s1) - Integer.parseInt(s2));
+			    		}
+					};
+					
+					newNames = orderByID.onResultOf(Functions.forMap(map)).sortedCopy(map.keySet());
+					// construct concatenated string of map entries
+					sb.setLength(0);
+					for(int idx=0; idx < newNames.size(); idx++) 
+					{
+						if(idx==0) 
+							sb.append( colName + DCD_NAME_SEP + newNames.get(idx));
+						else
+							sb.append( delim + colName + DCD_NAME_SEP + newNames.get(idx));
+					}
+					names[colID-1] = sb.toString();			// replace original column name with dcd name
+				}
+			}
+		}
+		
+		// process binned columns
+		if (_binList != null) 
+			for(int i=0; i < _binList.length; i++) 
+			{
+				int colID = _binList[i];
+				
+				// need to consider only binned and dummycoded columns
+				if(isDummyCoded(colID) == -1)
+					continue;
+				
+				int numBins = _numBins[i];
+				String colName = UtilFunctions.unquote(names[colID-1]);
+				
+				sb.setLength(0);
+				for(int idx=0; idx < numBins; idx++) 
+					if(idx==0) 
+						sb.append( colName + DCD_NAME_SEP + "Bin" + (idx+1) );
+					else
+						sb.append( delim + colName + DCD_NAME_SEP + "Bin" + (idx+1) );
+				names[colID-1] = sb.toString();			// replace original column name with dcd name
+			}
+		
+		// Construct the full header
+		sb.setLength(0);
+		for(int colID=0; colID < names.length; colID++) 
+		{
+			if (colID == 0)
+				sb.append(names[colID]);
+			else
+				sb.append(delim + names[colID]);
+		}
+		//System.out.println("DummycodedHeader: " + sb.toString());
+		
+		return sb.toString();
+	}
+	
+	@Override
+	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
+		if ( _dcdList == null )
+		{
+			_dummycodedLength = numCols;
+			return;
+		}
+		
+		// sort to-be dummycoded column IDs in ascending order. This is the order in which the new dummycoded record is constructed in apply() function.
+		Arrays.sort(_dcdList);	
+		_domainSizes = new int[_dcdList.length];
+
+		_dummycodedLength = numCols;
+		
+		//HashMap<String, String> map = null;
+		for(int i=0; i<_dcdList.length; i++) {
+			int colID = _dcdList[i];
+			
+			// Find the domain size for colID using _finalMaps or _finalMapsCP
+			int domainSize = 0;
+			if(_finalMaps != null) {
+				if(_finalMaps.get(colID) != null)
+					domainSize = _finalMaps.get(colID).size();
+			}
+			else {
+				if(_finalMapsCP.get(colID) != null)
+					domainSize = _finalMapsCP.get(colID).size();
+			}
+			
+			if ( domainSize != 0 ) {
+				// dummycoded column
+				_domainSizes[i] = domainSize;
+			}
+			else {
+				// binned column
+				if ( _binList != null )
+				for(int j=0; j<_binList.length; j++) {
+					if (colID == _binList[j]) {
+						_domainSizes[i] = _numBins[j];
+						break;
+					}
+				}
+			}
+			_dummycodedLength += _domainSizes[i]-1;
+			//System.out.println("colID=" + colID + ", domainsize=" + _domainSizes[i] + ", dcdLength=" + _dummycodedLength);
+		}
+	}
+
+	/**
+	 * Method to apply transformations.
+	 * 
+	 * @param words
+	 * @return
+	 */
+	@Override
+	public String[] apply(String[] words, TfUtils agents) {
+		
+		if ( _dcdList == null )
+			return words;
+		
+		String[] nwords = new String[(int)_dummycodedLength];
+		
+		int rcdVal = 0;
+		
+		for(int colID=1, idx=0, ncolID=1; colID <= words.length; colID++) {
+			if(idx < _dcdList.length && colID==_dcdList[idx]) {
+				// dummycoded columns
+				try {
+				rcdVal = UtilFunctions.parseToInt(UtilFunctions.unquote(words[colID-1]));
+				nwords[ ncolID-1+rcdVal-1 ] = "1";
+				ncolID += _domainSizes[idx];
+				idx++;
+				} catch (Exception e) {
+					System.out.println("Error in dummycoding: colID="+colID + ", rcdVal=" + rcdVal+", word="+words[colID-1] + ", domainSize=" + _domainSizes[idx] + ", dummyCodedLength=" + _dummycodedLength);
+					throw new RuntimeException(e);
+				}
+			}
+			else {
+				nwords[ncolID-1] = words[colID-1];
+				ncolID++;
+			}
+		}
+		
+		return nwords;
+	}
+	
+	/**
+	 * Check if the given column ID is subjected to this transformation.
+	 * 
+	 */
+	public int isDummyCoded(int colID)
+	{
+		if(_dcdList == null)
+			return -1;
+		
+		int idx = Arrays.binarySearch(_dcdList, colID);
+		return ( idx >= 0 ? idx : -1);
+	}
+	
+	@Override
+	public void print() {
+		System.out.print("Dummycoding List: \n    ");
+		for(int i : _dcdList) {
+			System.out.print(i + " ");
+		}
+		System.out.println();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/GTFMTDMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/GTFMTDMapper.java b/src/main/java/org/apache/sysml/runtime/transform/GTFMTDMapper.java
index e254403..4e3ece5 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/GTFMTDMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/GTFMTDMapper.java
@@ -1,107 +1,107 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.wink.json4j.JSONException;
-
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-
-
-public class GTFMTDMapper implements Mapper<LongWritable, Text, IntWritable, DistinctValue>{
-	
-	private OutputCollector<IntWritable, DistinctValue> _collector = null; 
-	private int _mapTaskID = -1;
-	
-	TfUtils _agents = null;
-
-	private boolean _partFileWithHeader = false;
-	private boolean _firstRecordInSplit = true;
-	private String _partFileName = null;
-	private long _offsetInPartFile = -1;
-	
-	// ----------------------------------------------------------------------------------------------
-	
-	/**
-	 * Configure the information used in the mapper, and setup transformation agents.
-	 */
-	@Override
-	public void configure(JobConf job) {
-		String[] parts = job.get("mapred.task.id").split("_");
-		if ( parts[0].equalsIgnoreCase("task")) {
-			_mapTaskID = Integer.parseInt(parts[parts.length-1]);
-		}
-		else if ( parts[0].equalsIgnoreCase("attempt")) {
-			_mapTaskID = Integer.parseInt(parts[parts.length-2]);
-		}
-		else {
-			throw new RuntimeException("Unrecognized format for taskID: " + job.get("mapred.task.id"));
-		}
-
-		try {
-			_partFileName = TfUtils.getPartFileName(job);
-			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
-			_agents = new TfUtils(job);
-		} catch(IOException e) { throw new RuntimeException(e); }
-		  catch(JSONException e)  { throw new RuntimeException(e); }
-
-	}
-	
-	
-	public void map(LongWritable rawKey, Text rawValue, OutputCollector<IntWritable, DistinctValue> out, Reporter reporter) throws IOException  {
-		
-		if(_firstRecordInSplit)
-		{
-			_firstRecordInSplit = false;
-			_collector = out;
-			_offsetInPartFile = rawKey.get();
-		}
-		
-		// ignore header
-		if (_agents.hasHeader() && rawKey.get() == 0 && _partFileWithHeader)
-			return;
-		
-		_agents.prepareTfMtd(rawValue.toString());
-	}
-
-	@Override
-	public void close() throws IOException {
-		_agents.getMVImputeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
-		_agents.getRecodeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
-		_agents.getBinAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
-		
-		// Output part-file offsets to create OFFSETS_FILE, which is to be used in csv reblocking.
-		// OffsetCount is denoted as a DistinctValue by concatenating parfile name and offset within partfile.
-		_collector.collect(new IntWritable((int)_agents.getNumCols()+1), new DistinctValue(new OffsetCount(_partFileName, _offsetInPartFile, _agents.getValid())));
-		
-		// reset global variables, required when the jvm is reused.
-		_firstRecordInSplit = true;
-		_offsetInPartFile = -1;
-		_partFileWithHeader = false;
-	}
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.wink.json4j.JSONException;
+
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+
+
+public class GTFMTDMapper implements Mapper<LongWritable, Text, IntWritable, DistinctValue>{
+	
+	private OutputCollector<IntWritable, DistinctValue> _collector = null; 
+	private int _mapTaskID = -1;
+	
+	TfUtils _agents = null;
+
+	private boolean _partFileWithHeader = false;
+	private boolean _firstRecordInSplit = true;
+	private String _partFileName = null;
+	private long _offsetInPartFile = -1;
+	
+	// ----------------------------------------------------------------------------------------------
+	
+	/**
+	 * Configure the information used in the mapper, and setup transformation agents.
+	 */
+	@Override
+	public void configure(JobConf job) {
+		String[] parts = job.get("mapred.task.id").split("_");
+		if ( parts[0].equalsIgnoreCase("task")) {
+			_mapTaskID = Integer.parseInt(parts[parts.length-1]);
+		}
+		else if ( parts[0].equalsIgnoreCase("attempt")) {
+			_mapTaskID = Integer.parseInt(parts[parts.length-2]);
+		}
+		else {
+			throw new RuntimeException("Unrecognized format for taskID: " + job.get("mapred.task.id"));
+		}
+
+		try {
+			_partFileName = TfUtils.getPartFileName(job);
+			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
+			_agents = new TfUtils(job);
+		} catch(IOException e) { throw new RuntimeException(e); }
+		  catch(JSONException e)  { throw new RuntimeException(e); }
+
+	}
+	
+	
+	public void map(LongWritable rawKey, Text rawValue, OutputCollector<IntWritable, DistinctValue> out, Reporter reporter) throws IOException  {
+		
+		if(_firstRecordInSplit)
+		{
+			_firstRecordInSplit = false;
+			_collector = out;
+			_offsetInPartFile = rawKey.get();
+		}
+		
+		// ignore header
+		if (_agents.hasHeader() && rawKey.get() == 0 && _partFileWithHeader)
+			return;
+		
+		_agents.prepareTfMtd(rawValue.toString());
+	}
+
+	@Override
+	public void close() throws IOException {
+		_agents.getMVImputeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
+		_agents.getRecodeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
+		_agents.getBinAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
+		
+		// Output part-file offsets to create OFFSETS_FILE, which is to be used in csv reblocking.
+		// OffsetCount is denoted as a DistinctValue by concatenating parfile name and offset within partfile.
+		_collector.collect(new IntWritable((int)_agents.getNumCols()+1), new DistinctValue(new OffsetCount(_partFileName, _offsetInPartFile, _agents.getValid())));
+		
+		// reset global variables, required when the jvm is reused.
+		_firstRecordInSplit = true;
+		_offsetInPartFile = -1;
+		_partFileWithHeader = false;
+	}
+	
+}


[44/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/StepGLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/StepGLM.dml b/scripts/algorithms/StepGLM.dml
index 443ae95..10737ff 100644
--- a/scripts/algorithms/StepGLM.dml
+++ b/scripts/algorithms/StepGLM.dml
@@ -1,1196 +1,1196 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT CHOOSES A GLM REGRESSION MODEL IN A STEPWISE ALGIRITHM USING AIC
-# EACH GLM REGRESSION IS SOLVED USING NEWTON/FISHER SCORING WITH TRUST REGIONS
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE     DEFAULT   MEANING
-# ---------------------------------------------------------------------------------------------
-# X     String    ---      Location to read the matrix X of feature vectors
-# Y     String    ---      Location to read response matrix Y with 1 column
-# B     String    ---      Location to store estimated regression parameters (the betas)
-# S     String    ---      Location to write the selected features ordered as computed by the algorithm
-# O     String    " "      Location to write the printed statistics; by default is standard output
-# link  Int       2        Link function code: 1 = log, 2 = Logit, 3 = Probit, 4 = Cloglog
-# yneg  Double    0.0      Response value for Bernoulli "No" label, usually 0.0 or -1.0
-# icpt  Int       0        Intercept presence, X columns shifting and rescaling:
-#                          0 = no intercept, no shifting, no rescaling;
-#                          1 = add intercept, but neither shift nor rescale X;
-#                          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# tol   Double    0.000001 Tolerance (epsilon)
-# disp  Double    0.0      (Over-)dispersion value, or 0.0 to estimate it from data
-# moi   Int       200      Maximum number of outer (Newton / Fisher Scoring) iterations
-# mii   Int       0        Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-# thr   Double    0.01     Threshold to stop the algorithm: if the decrease in the value of AIC falls below thr
-#                          no further features are being checked and the algorithm stops 
-# fmt   String   "text"    The betas matrix output format, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: Matrix beta, whose size depends on icpt:
-#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
-#
-# In addition, in the last run of GLM some statistics are provided in CSV format, one comma-separated name-value
-# pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# TERMINATION_CODE      A positive integer indicating success/failure as follows:
-#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
-#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
-# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
-# BETA_MIN_INDEX        Column index for the smallest beta value
-# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
-# BETA_MAX_INDEX        Column index for the largest beta value
-# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
-# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
-#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
-# DISPERSION_EST        Dispersion estimated from the dataset
-# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
-# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
-# -------------------------------------------------------------------------------------------
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f StepGLM.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
-# 			 S=OUTPUT_DIR_S/selected O=OUTPUT_DIR/stats link=2 yneg=-1.0 icpt=2 tol=0.00000001 
-#			 disp=1.0 moi=100 mii=10 thr=0.01 fmt=csv  
-#
-# THE StepGLM SCRIPT CURRENTLY SUPPORTS BERNOULLI DISTRIBUTION FAMILY AND THE FOLLOWING LINK FUNCTIONS ONLY!
-# 	- LOG
-#	- LOGIT
-#	- PROBIT
-#	- CLOGLOG
-	
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-intercept_status = ifdef ($icpt, 0);   
-thr = ifdef ($thr, 0.01); 
-bernoulli_No_label = ifdef ($yneg, 0.0);    # $yneg = 0.0;
-distribution_type = 2;
-
-bernoulli_No_label = as.double (bernoulli_No_label);
-
-# currently only the forward selection strategy in supported: start from one feature and iteratively add 
-# features until AIC improves
-dir = "forward";
-
-print("BEGIN STEPWISE GLM SCRIPT");
-print ("Reading X and Y...");
-X_orig = read (fileX);
-Y = read (fileY);
-
-if (distribution_type == 2 & ncol(Y) == 1) {
-	is_Y_negative = ppred (Y, bernoulli_No_label, "==");
-	Y = append (1 - is_Y_negative, is_Y_negative);
-	count_Y_negative = sum (is_Y_negative);
-	if (count_Y_negative == 0) {
-		stop ("StepGLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
-	}
-	if (count_Y_negative == nrow(Y)) {
-		stop ("StepGLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
-	}
-}
-
-num_records = nrow (X_orig);
-num_features = ncol (X_orig);
-
-# BEGIN STEPWISE GENERALIZED LINEAR MODELS 
-
-if (dir == "forward") {  
-	
-	continue = TRUE;
-	columns_fixed = matrix (0, rows = 1, cols = num_features);
-	columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
-  
-	# X_global stores the best model found at each step 
-	X_global = matrix (0, rows = num_records, cols = 1);
- 
-	if (intercept_status == 0) {
-		# Compute AIC of an empty model with no features and no intercept (all Ys are zero)
-		[AIC_best] = glm (X_global, Y, 0, num_features, columns_fixed_ordered, " ");
-	} else {
-		# compute AIC of an empty model with only intercept (all Ys are constant)
-		all_ones = matrix (1, rows = num_records, cols = 1);
-		[AIC_best] = glm (all_ones, Y, 0, num_features, columns_fixed_ordered, " ");
-	}
-	print ("Best AIC without any features: " + AIC_best);
-  
-	# First pass to examine single features
-	AICs = matrix (AIC_best, rows = 1, cols = num_features);
-	parfor (i in 1:num_features) { 	
-		[AIC_1] = glm (X_orig[,i], Y, intercept_status, num_features, columns_fixed_ordered, " ");
-		AICs[1,i] = AIC_1;
-	}
-  
-	# Determine the best AIC 
-	column_best = 0;	
-	for (k in 1:num_features) {
-		AIC_cur = as.scalar (AICs[1,k]);
-		if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) ) {
-			column_best = k;
-			AIC_best = as.scalar(AICs[1,k]);
-		}
-	}
-  
-	if (column_best == 0) {
-		print ("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
-		if (intercept_status == 0) {
-			# Compute AIC of an empty model with no features and no intercept (all Ys are zero)
-			[AIC_best] = glm (X_global, Y, 0, num_features, columns_fixed_ordered, fileB);
-		} else {
-			# compute AIC of an empty model with only intercept (all Ys are constant)
-			###all_ones = matrix (1, rows = num_records, cols = 1);
-			[AIC_best] = glm (all_ones, Y, 0, num_features, columns_fixed_ordered, fileB);
-		}
-	};
-  
-	print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);	
-	columns_fixed[1,column_best] = 1;
-	columns_fixed_ordered[1,1] = column_best;
-	X_global = X_orig[,column_best];		
-  
-	while (continue) {
-		# Subsequent passes over the features
-		parfor (i in 1:num_features) { 
-			if (as.scalar(columns_fixed[1,i]) == 0) {	
-        
-				# Construct the feature matrix
-				X = append (X_global, X_orig[,i]);
-        
-				[AIC_2] = glm (X, Y, intercept_status, num_features, columns_fixed_ordered, " ");
-				AICs[1,i] = AIC_2;
-			}		
-		}
-    
-		# Determine the best AIC
-		for (k in 1:num_features) {
-			AIC_cur = as.scalar (AICs[1,k]);
-			if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
-				column_best = k;
-				AIC_best = as.scalar(AICs[1,k]);
-			}
-		}
-    
-		# Append best found features (i.e., columns) to X_global
-		if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best feature found
-			print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);
-			columns_fixed[1,column_best] = 1;
-			columns_fixed_ordered = append (columns_fixed_ordered, as.matrix(column_best));
-			if (ncol(columns_fixed_ordered) == num_features) { # all features examined
-				X_global = append (X_global, X_orig[,column_best]);
-				continue = FALSE;
-			} else {
-				X_global = append (X_global, X_orig[,column_best]);
-			}
-		} else {
-		continue = FALSE;
-		}
-	}
-  
-	# run GLM with selected set of features
-	print ("Running GLM with selected features...");
-	[AIC] = glm (X_global, Y, intercept_status, num_features, columns_fixed_ordered, fileB);
-  
-} else {
-	stop ("Currently only forward selection strategy is supported!");
-}
-
-
-################### UDFS USED IN THIS SCRIPT ##################
-
-glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
-		
-	# distribution family code: 1 = Power, 2 = Bernoulli/Binomial; currently only Bernouli distribution family is supported!		
-	distribution_type = 2;                		# $dfam = 2;
-	variance_as_power_of_the_mean = 0.0;  		# $vpow = 0.0;
-	# link function code: 0 = canonical (depends on distribution), 1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit;
-	# currently only log (link = 1), logit (link = 2), probit (link = 3), and cloglog (link = 4) are supported!
-	link_type = ifdef ($link, 2);         		# $link = 2;
-	link_as_power_of_the_mean = 0.0;      		# $lpow = 0.0;
-
-	dispersion = ifdef ($disp, 0.0);            # $disp = 0.0;
-	eps = ifdef ($tol, 0.000001);               # $tol  = 0.000001;
-	max_iteration_IRLS = ifdef ($moi, 200);     # $moi  = 200;
-	max_iteration_CG = ifdef ($mii, 0);         # $mii  = 0;
-
-	variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
-	link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
-
-	dispersion = as.double (dispersion);
-	eps = as.double (eps);              
-
-	# Default values for output statistics:
-	regularization = 0.0;
-	termination_code     = 0.0;
-	min_beta             = 0.0 / 0.0;
-	i_min_beta           = 0.0 / 0.0;
-	max_beta             = 0.0 / 0.0;
-	i_max_beta           = 0.0 / 0.0;
-	intercept_value      = 0.0 / 0.0;
-	dispersion           = 0.0 / 0.0;
-	estimated_dispersion = 0.0 / 0.0;
-	deviance_nodisp      = 0.0 / 0.0;
-	deviance             = 0.0 / 0.0;				  
-                  
-	#####   INITIALIZE THE PARAMETERS   #####
-                  
-    num_records  = nrow (X);
-    num_features = ncol (X);
-    zeros_r = matrix (0, rows = num_records, cols = 1);
-    ones_r = 1 + zeros_r;
-                  
-    # Introduce the intercept, shift and rescale the columns of X if needed
-                  
-    if (intercept_status == 1 | intercept_status == 2) { # add the intercept column
-		X = append (X, ones_r);
-        num_features = ncol (X);
-	}
-                  
-    scale_lambda = matrix (1, rows = num_features, cols = 1);
-    if (intercept_status == 1 | intercept_status == 2) {
-		scale_lambda [num_features, 1] = 0;
-    }
-                  
-    if (intercept_status == 2) {  # scale-&-shift X columns to mean 0, variance 1
-		# Important assumption: X [, num_features] = ones_r
-        avg_X_cols = t(colSums(X)) / num_records;
-        var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
-        is_unsafe = ppred (var_X_cols, 0.0, "<=");
-        scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-        scale_X [num_features, 1] = 1;
-        shift_X = - avg_X_cols * scale_X;
-        shift_X [num_features, 1] = 0;
-        rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
-	} else {
-        scale_X = matrix (1, rows = num_features, cols = 1);
-        shift_X = matrix (0, rows = num_features, cols = 1);
-        rowSums_X_sq = rowSums (X ^ 2);
-    }	
-                  
-    # Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
-    # with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
-    # The transform is then associatively applied to the other side of the expression,
-    # and is rewritten via "scale_X" and "shift_X" as follows:
-    #
-    # ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-    # ssX_A  = diag (scale_X) %*% A;
-    # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
-    #
-    # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-    # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
-                  
-    # Initialize other input-dependent parameters
-                  
-    lambda = scale_lambda * regularization;
-    if (max_iteration_CG == 0) {
-		max_iteration_CG = num_features;
-    }
-                  
-    # Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
-                  
-    if (link_type == 0) {
-		if (distribution_type == 1) {
-			link_type = 1;
-            link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
-		} else { 
-			if (distribution_type == 2) {
-				link_type = 2;
-			}   
-		}   
-	}
-                                
-    # For power distributions and/or links, we use two constants,
-    # "variance as power of the mean" and "link_as_power_of_the_mean",
-    # to specify the variance and the link as arbitrary powers of the
-    # mean.  However, the variance-powers of 1.0 (Poisson family) and
-    # 2.0 (Gamma family) have to be treated as special cases, because
-    # these values integrate into logarithms.  The link-power of 0.0
-    # is also special as it represents the logarithm link.
-                  
-    num_response_columns = ncol (Y);
-    is_supported = 0;              
-	if (num_response_columns == 2 & distribution_type == 2 & link_type >= 1 & link_type <= 4) { # BERNOULLI DISTRIBUTION
-		is_supported = 1;			  
-	}
-	if (num_response_columns == 1 & distribution_type == 2) {
-		print ("Error: Bernoulli response matrix has not been converted into two-column format.");
-    }
-
-	if (is_supported == 1) {
-                    
-		#####   INITIALIZE THE BETAS   #####
-                    
-        [beta, saturated_log_l, isNaN] = 
-			glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
-					  
-		# print(" --- saturated logLik " + saturated_log_l);
-					  
-        if (isNaN == 0) {
-                      
-			#####  START OF THE MAIN PART  #####
-                      
-            sum_X_sq = sum (rowSums_X_sq);
-            trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
-            ###  max_trust_delta = trust_delta * 10000.0;
-            log_l = 0.0;
-            deviance_nodisp = 0.0;
-            new_deviance_nodisp = 0.0;
-            isNaN_log_l = 2;
-            newbeta = beta;
-            g = matrix (0.0, rows = num_features, cols = 1);
-            g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-            accept_new_beta = 1;
-            reached_trust_boundary = 0;
-            neg_log_l_change_predicted = 0.0;
-            i_IRLS = 0;
-                      
-            # print ("BEGIN IRLS ITERATIONS...");
-                      
-            ssX_newbeta = diag (scale_X) %*% newbeta;
-            ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-            all_linear_terms = X %*% ssX_newbeta;
-                      
-            [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-				(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-					  
-            if (isNaN_new_log_l == 0) {
-				new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-                new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-            }
-                      
-            while (termination_code == 0) {
-				accept_new_beta = 1;
-                        
-                if (i_IRLS > 0) {
-					if (isNaN_log_l == 0) {
-						accept_new_beta = 0;
-                    }
-                          
-                    # Decide whether to accept a new iteration point and update the trust region
-                    # See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-                          
-                    rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
-					if (rho < 0.25 | isNaN_new_log_l == 1) {
-						trust_delta = 0.25 * trust_delta;
-					}
-					if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
-						trust_delta = 2 * trust_delta;
-                            
-						### if (trust_delta > max_trust_delta) {
-						###     trust_delta = max_trust_delta;
-						### }
-					}
-					if (rho > 0.1 & isNaN_new_log_l == 0) {
-						accept_new_beta = 1;
-					}
-				}
-                        
-                if (accept_new_beta == 1) {
-					beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
-                          
-					[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-                          
-					# We introduced these variables to avoid roundoff errors:
-					#     g_Y = y_residual / (y_var * link_grad);
-					#     w   = 1.0 / (y_var * link_grad * link_grad);
-                          
-					gXY = - t(X) %*% g_Y;
-					g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
-					g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-				}
-                        
-                [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
-					get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
-                        
-				newbeta = beta + z;
-                        
-				ssX_newbeta = diag (scale_X) %*% newbeta;
-				ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-				all_linear_terms = X %*% ssX_newbeta;
-                        
-				[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-					(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-                        
-				if (isNaN_new_log_l == 0) {
-					new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-					new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-				}
-                        
-				log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
-                       
-				if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
-				   (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) ) {
-					termination_code = 1;
-				}
-				rho = - log_l_change / neg_log_l_change_predicted;
-				z_norm = sqrt (sum (z * z));
-                                              
-				i_IRLS = i_IRLS + 1;                        
-                        
-				if (i_IRLS == max_iteration_IRLS) {
-					termination_code = 2;
-				}
-			}
-                      
-            beta = newbeta;
-            log_l = new_log_l;
-            deviance_nodisp = new_deviance_nodisp;
-                     
-            #---------------------------- last part
-
-			if (termination_code != 1) {
-				print ("One of the runs of GLM did not converged in " + i_IRLS + " steps!");
-			}
-                      
-            ##### COMPUTE AIC ##### 
-            
-			if (distribution_type == 2 & link_type >= 1 & link_type <= 4) {			
-				AIC = -2 * log_l;
-				if (sum (X) != 0) {
-					AIC = AIC + 2 * num_features;	
-				}
-			} else {
-				stop ("Currently only the Bernoulli distribution family the following link functions are supported: log, logit, probit, and cloglog!");
-			}
-					           
-            if (fileB != " ") {
-				fileO = ifdef ($O, " ");
-				fileS = $S;
-				fmt  = ifdef ($fmt, "text");	
-			
-				# Output which features give the best AIC and are being used for linear regression 
-				write (Selected, fileS, format=fmt);
-		
-				ssX_beta = diag (scale_X) %*% beta;
-                ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
-				if (intercept_status == 2) {
-					beta_out = append (ssX_beta, beta);
-                } else {
-					beta_out = ssX_beta;
-                }
-                
-                if (intercept_status == 0 & num_features == 1) {
-					p = sum (ppred (X, 1, "=="));
-					if (p == num_records) {
-						beta_out = beta_out[1,];
-					}					
-                } 
-
-								
-                if (intercept_status == 1 | intercept_status == 2) {
-					intercept_value = castAsScalar (beta_out [num_features, 1]);
-                    beta_noicept = beta_out [1 : (num_features - 1), 1];
-                } else {
-					beta_noicept = beta_out [1 : num_features, 1];
-                }
-                min_beta = min (beta_noicept);
-                max_beta = max (beta_noicept);
-                tmp_i_min_beta = rowIndexMin (t(beta_noicept))
-                i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
-                tmp_i_max_beta = rowIndexMax (t(beta_noicept))
-                i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
-                        
-                #####  OVER-DISPERSION PART  #####
-                      
-                all_linear_terms = X %*% ssX_beta;
-				[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-                        
-                pearson_residual_sq = g_Y ^ 2 / w;
-                pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
-                # pearson_residual_sq = (y_residual ^ 2) / y_var;
-                        
-                if (num_records > num_features) {
-					estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
-                }
-                if (dispersion <= 0.0) {
-					dispersion = estimated_dispersion;
-                }
-                deviance = deviance_nodisp / dispersion;
-                        
-                #####  END OF THE MAIN PART  #####
-                        
-                str = "BETA_MIN," + min_beta;
-                str = append (str, "BETA_MIN_INDEX," + i_min_beta);
-                str = append (str, "BETA_MAX," + max_beta);
-                str = append (str, "BETA_MAX_INDEX," + i_max_beta);
-                str = append (str, "INTERCEPT," + intercept_value);
-                str = append (str, "DISPERSION," + dispersion);
-                str = append (str, "DISPERSION_EST," + estimated_dispersion);
-                str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
-                str = append (str, "DEVIANCE_SCALED," + deviance);
-                        
-                if (fileO != " ") {
-					write (str, fileO);
-                } 
-				else {
-					print (str);
-                }
-			
-				# Prepare the output matrix
-				print ("Writing the output matrix...");
-                if (intercept_status == 0 & num_features == 1) { 
-					if (p == num_records) {
-						beta_out_tmp = matrix (0, rows = num_features_orig + 1, cols = 1); 
-						beta_out_tmp[num_features_orig + 1,] = beta_out;
-						beta_out = beta_out_tmp;
-						write (beta_out, fileB, format=fmt);
-						stop ("");
-					} else if (sum (X) == 0){
-						beta_out = matrix (0, rows = num_features_orig, cols = 1);
-						write (beta_out, fileB, format=fmt);
-						stop ("");
-					}
-				}
-
-				no_selected = ncol (Selected);
-				max_selected = max (Selected);
-				last = max_selected + 1;	
-		
-				if (intercept_status != 0) {
-		
-					Selected_ext = append (Selected, as.matrix (last));			
-					P1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); 
-
-					if (intercept_status == 2) {
-			
-						P1_ssX_beta = P1 * ssX_beta;
-						P2_ssX_beta = colSums (P1_ssX_beta);
-						P1_beta = P1 * beta;
-						P2_beta = colSums (P1_beta);
-				
-						if (max_selected < num_features_orig) {
-						
-							P2_ssX_beta = append (P2_ssX_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
-							P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
-							
-							P2_ssX_beta[1, num_features_orig+1] = P2_ssX_beta[1, max_selected + 1]; 
-							P2_ssX_beta[1, max_selected + 1] = 0;
-							
-							P2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1]; 
-							P2_beta[1, max_selected + 1] = 0;
-
-						}
-						beta_out = append (t(P2_ssX_beta), t(P2_beta));
-				
-					} else {
-			
-						P1_beta = P1 * beta;
-						P2_beta = colSums (P1_beta);
-				
-						if (max_selected < num_features_orig) {
-							P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
-							P2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1] ; 
-							P2_beta[1, max_selected + 1] = 0;
-						}
-						beta_out = t(P2_beta);
-				
-					}
-				} else {
-		
-					P1 = table (seq (1, no_selected), t(Selected)); 
-					P1_beta = P1 * beta;
-					P2_beta = colSums (P1_beta);	
-
-					if (max_selected < num_features_orig) {
-						P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
-					}		
-
-					beta_out = t(P2_beta);	
-				}
-	
-				write ( beta_out, fileB, format=fmt );
-			
-			}
-                      
-		} else { 
-			stop ("Input matrices X and/or Y are out of range!"); 
-        }
-	} else { 
-		stop ("Response matrix with " + num_response_columns + " columns, distribution family (" + distribution_type + ", " + variance_as_power_of_the_mean
-               + ") and link family (" + link_type + ", " + link_as_power_of_the_mean + ") are NOT supported together.");
-    }
-}
-
-glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
-  return (Matrix[double] beta, double saturated_log_l, int isNaN)
-{
-    saturated_log_l = 0.0;
-    isNaN = 0;
-    y_corr = Y [, 1];
-    if (dist_type == 2) {
-      n_corr = rowSums (Y);
-      is_n_zero = ppred (n_corr, 0.0, "==");
-      y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
-    }
-    linear_terms = y_corr;
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-      if          (link_power ==  0.0) {
-        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-          is_zero_y_corr = ppred (y_corr, 0.0, "==");
-          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        } else { isNaN = 1; }
-      } else { if (link_power ==  1.0) {
-        linear_terms = y_corr;
-      } else { if (link_power == -1.0) {
-        linear_terms = 1.0 / y_corr;
-      } else { if (link_power ==  0.5) {
-        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-          linear_terms = sqrt (y_corr);
-        } else { isNaN = 1; }
-      } else { if (link_power >   0.0) {
-        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-          is_zero_y_corr = ppred (y_corr, 0.0, "==");
-          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-        } else { isNaN = 1; }
-      } else {
-        if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-          linear_terms = y_corr ^ link_power;
-        } else { isNaN = 1; }
-      }}}}}
-    }
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-      if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-          is_zero_y_corr = ppred (y_corr, 0.0, "==");
-          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        } else { isNaN = 1; }
-      } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
-        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-          is_zero_y_corr = ppred (y_corr, 0.0, "==");
-          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-        } else { isNaN = 1; }
-      } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
-        if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-          linear_terms = y_corr ^ link_power;
-        } else { isNaN = 1; }
-      } else { 
-        is_zero_y_corr = ppred (y_corr, 0.0, "<=");
-        is_one_y_corr  = ppred (y_corr, 1.0, ">=");
-        y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
-        if (link_type == 2)                           { # Binomial.logit
-          linear_terms = log (y_corr / (1.0 - y_corr)) 
-          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        } else { if (link_type == 3)                  { # Binomial.probit
-          y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
-          t = sqrt (- 2.0 * log (y_below_half));
-          approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
-          linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
-          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        } else { if (link_type == 4)                  { # Binomial.cloglog
-          linear_terms = log (- log (1.0 - y_corr))
-          - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
-          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        } else { if (link_type == 5)                  { # Binomial.cauchit
-          linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
-          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        }}  }}}}}
-    }
-    
-    if (isNaN == 0) {
-      [saturated_log_l, isNaN] = 
-        glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
-    }
-    
-    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
-          (dist_type == 2 & link_type >= 2))
-    {    
-      desired_eta = 0.0;
-    } else { if (link_type == 1 & link_power == 0.0) {
-      desired_eta = log (0.5);
-    } else { if (link_type == 1) {
-      desired_eta = 0.5 ^ link_power;
-    } else {
-      desired_eta = 0.5;
-    }}}
-    
-    beta = matrix (0.0, rows = ncol(X), cols = 1);
-    
-    if (desired_eta != 0.0) {
-      if (icept_status == 1 | icept_status == 2) {
-        beta [nrow(beta), 1] = desired_eta;
-      } else {
-        # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
-        # Note that "ssX_transform" is trivial here, hence ignored
-        
-        beta = straightenX (X, 0.000001, max_iter_CG);  
-        beta = beta * desired_eta;
-      }   }   }
-
-
-glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
-                     int dist_type, double var_power, int link_type, double link_power)
-  return (Matrix[double] g_Y, Matrix[double] w)
-# ORIGINALLY we returned more meaningful vectors, namely:
-# Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
-# Matrix[double] link_gradient : derivative of the link function
-# Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
-# BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
-# and skip over the "meaningful intermediaries".  Now we output these two variables:
-#     g_Y = y_residual / (var_function * link_gradient);
-#     w   = 1.0 / (var_function * link_gradient ^ 2);
-{
-    num_records = nrow (linear_terms);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    ones_r = 1 + zeros_r;
-    g_Y  = zeros_r;
-    w  = zeros_r;
-    
-    # Some constants
-    
-    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
-    ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = ones_2;
-    p_one_m_one [1, 2] = -1.0;
-    m_one_p_one = ones_2;
-    m_one_p_one [1, 1] = -1.0;
-    zero_one = ones_2;
-    zero_one [1, 1] = 0.0;
-    one_zero = ones_2;
-    one_zero [1, 2] = 0.0;
-    flip_pos = matrix (0, rows = 2, cols = 2);
-    flip_neg = flip_pos;
-    flip_pos [1, 2] = 1;
-    flip_pos [2, 1] = 1;
-    flip_neg [1, 2] = -1;
-    flip_neg [2, 1] = 1;
-    
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-      y_mean = zeros_r;
-      if          (link_power ==  0.0) {
-        y_mean = exp (linear_terms);
-        y_mean_pow = y_mean ^ (1 - var_power);
-        w   = y_mean_pow * y_mean;
-        g_Y = y_mean_pow * (Y - y_mean);
-      } else { if (link_power ==  1.0) {
-        y_mean = linear_terms;
-        w   = y_mean ^ (- var_power);
-        g_Y = w * (Y - y_mean);
-      } else {
-        y_mean = linear_terms ^ (1.0 / link_power);
-        c1  = (1 - var_power) / link_power - 1;
-        c2  = (2 - var_power) / link_power - 2;
-        g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
-        w   = (linear_terms ^ c2) / (link_power ^ 2);
-      }   }}
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-      if (link_type == 1) { # BINOMIAL.POWER LINKS
-        if (link_power == 0.0)  { # Binomial.log
-          vec1 = 1 / (exp (- linear_terms) - 1);
-          g_Y = Y [, 1] - Y [, 2] * vec1;
-          w   = rowSums (Y) * vec1;
-        } else {                  # Binomial.nonlog
-          vec1 = zeros_r;
-          if (link_power == 0.5)  {
-            vec1 = 1 / (1 - linear_terms ^ 2);
-          } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-            vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
-          } else {isNaN = 1;}}
-          # We want a "zero-protected" version of
-          #     vec2 = Y [, 1] / linear_terms;
-          is_y_0 = ppred (Y [, 1], 0.0, "==");
-          vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
-          g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
-          w   =  rowSums (Y) * vec1 / link_power ^ 2;
-        }
-      } else {
-        is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-        is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-        finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-        finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-        if (link_type == 2)                           { # Binomial.logit
-          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-          g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;
-          w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;
-        } else { if (link_type == 3)                  { # Binomial.probit
-          is_lt_pos = ppred (linear_terms, 0.0, ">=");
-          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-          pt_gp = t_gp * ( 0.254829592 
-                           + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                                                + t_gp * (-1.453152027 
-                                                          + t_gp *   1.061405429))));
-          the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
-          vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
-          vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);
-          w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
-          g_Y = one_over_sqrt_two_pi * vec2 / vec1;
-        } else { if (link_type == 4)                  { # Binomial.cloglog
-          the_exp = exp (linear_terms)
-          the_exp_exp = exp (- the_exp);
-          is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-          the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
-          g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
-          w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
-        } else { if (link_type == 5)                  { # Binomial.cauchit
-          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-          y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
-          var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
-          link_gradient_normalized = (1 + linear_terms ^ 2) * 3.1415926535897932384626433832795;
-          g_Y =  rowSums (Y) * y_residual / (var_function * link_gradient_normalized);
-          w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);
-        }}}}   
-      }
-    }
-  }
-
-
-glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,
-                                    int dist_type, double var_power, int link_type, double link_power)
-  return (double log_l, int isNaN)
-{
-    isNaN = 0;
-    log_l = 0.0;
-    num_records = nrow (Y);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    
-    if (dist_type == 1 & link_type == 1)
-    { # POWER DISTRIBUTION
-      b_cumulant = zeros_r;
-      natural_parameters = zeros_r;
-      is_natural_parameter_log_zero = zeros_r;
-      if          (var_power == 1.0 & link_power == 0.0)  { # Poisson.log
-        b_cumulant = exp (linear_terms);
-        is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-        natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-      } else { if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id
-        if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-          b_cumulant = linear_terms;
-          is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-          natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
-        } else {isNaN = 1;}
-      } else { if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt
-        if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-          b_cumulant = linear_terms ^ 2;
-          is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-          natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
-        } else {isNaN = 1;}
-      } else { if (var_power == 1.0 & link_power  > 0.0)  { # Poisson.power_nonlog, pos
-        if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-          is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-          b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-          natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-        } else {isNaN = 1;}
-      } else { if (var_power == 1.0)                      { # Poisson.power_nonlog, neg
-        if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-          b_cumulant = linear_terms ^ (1.0 / link_power);
-          natural_parameters = log (linear_terms) / link_power;
-        } else {isNaN = 1;}
-      } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
-        if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-          b_cumulant = - log (linear_terms);
-          natural_parameters = - linear_terms;
-        } else {isNaN = 1;}
-      } else { if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id
-        if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-          b_cumulant = log (linear_terms);
-          natural_parameters = - 1.0 / linear_terms;
-        } else {isNaN = 1;}
-      } else { if (var_power == 2.0 & link_power ==  0.0) { # Gamma.log
-        b_cumulant = linear_terms;
-        natural_parameters = - exp (- linear_terms);
-      } else { if (var_power == 2.0)                      { # Gamma.power_nonlog
-        if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-          b_cumulant = log (linear_terms) / link_power;
-          natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-        } else {isNaN = 1;}
-      } else { if                    (link_power ==  0.0) { # PowerDist.log
-        natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-        b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-      } else {                                              # PowerDist.power_nonlog
-        if          (-2 * link_power == 1.0 - var_power) {
-          natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
-        } else { if (-1 * link_power == 1.0 - var_power) {
-          natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
-        } else { if (     link_power == 1.0 - var_power) {
-          natural_parameters = linear_terms / (1.0 - var_power);
-        } else { if ( 2 * link_power == 1.0 - var_power) {
-          natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
-        } else {
-          if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-            power = (1.0 - var_power) / link_power;
-            natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
-          } else {isNaN = 1;}
-        }}}}
-        if          (-2 * link_power == 2.0 - var_power) {
-          b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
-        } else { if (-1 * link_power == 2.0 - var_power) {
-          b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
-        } else { if (     link_power == 2.0 - var_power) {
-          b_cumulant = linear_terms / (2.0 - var_power);
-        } else { if ( 2 * link_power == 2.0 - var_power) {
-          b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
-        } else {
-          if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-            power = (2.0 - var_power) / link_power;
-            b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
-          } else {isNaN = 1;}
-        }}}}
-      }}}}} }}}}}
-      if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
-        log_l = -1.0 / 0.0;
-        isNaN = 1;
-      }
-      if (isNaN == 0)
-      {
-        log_l = sum (Y * natural_parameters - b_cumulant);
-        if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-          log_l = -1.0 / 0.0;
-          isNaN = 1;
-        }   }   }
-    
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-      
-      [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
-      
-      if (isNaN == 0) {            
-        does_prob_contradict = ppred (Y_prob, 0.0, "<=");
-        if (sum (does_prob_contradict * abs (Y)) == 0.0) {
-          log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
-          if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-            isNaN = 1;
-          }
-        } else {
-          log_l = -1.0 / 0.0;
-          isNaN = 1;
-        }   }   }
-    
-    if (isNaN == 1) {
-      log_l = - 1.0 / 0.0; 
-    }
-  }
-
-
-
-binomial_probability_two_column =
-  function (Matrix[double] linear_terms, int link_type, double link_power)
-    return   (Matrix[double] Y_prob, int isNaN)
-{
-      isNaN = 0;
-      num_records = nrow (linear_terms);
-      
-      # Define some auxiliary matrices
-      
-      ones_2 = matrix (1.0, rows = 1, cols = 2);
-      p_one_m_one = ones_2;
-      p_one_m_one [1, 2] = -1.0;
-      m_one_p_one = ones_2;
-      m_one_p_one [1, 1] = -1.0;
-      zero_one = ones_2;
-      zero_one [1, 1] = 0.0;
-      one_zero = ones_2;
-      one_zero [1, 2] = 0.0;
-      
-      zeros_r = matrix (0.0, rows = num_records, cols = 1);
-      ones_r = 1.0 + zeros_r;
-      
-      # Begin the function body
-      
-      Y_prob = zeros_r %*% ones_2;
-      if (link_type == 1) { # Binomial.power
-        if          (link_power == 0.0) { # Binomial.log
-          Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else { if (link_power == 0.5) { # Binomial.sqrt
-          Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else {                          # Binomial.power_nonlog
-          if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-            Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    
-          } else {isNaN = 1;}
-        }}
-      } else {              # Binomial.non_power
-        is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-        is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-        finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-        finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-        if (link_type == 2)             { # Binomial.logit
-          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-        } else { if (link_type == 3)    { # Binomial.probit
-          lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
-          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-          pt_gp = t_gp * ( 0.254829592 
-                           + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                                                + t_gp * (-1.453152027 
-                                                          + t_gp *   1.061405429))));
-          the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
-          Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);
-        } else { if (link_type == 4)    { # Binomial.cloglog
-          the_exp = exp (finite_linear_terms);
-          the_exp_exp = exp (- the_exp);
-          is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-          Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
-          Y_prob [, 2] = the_exp_exp;
-        } else { if (link_type == 5)    { # Binomial.cauchit
-          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-        } else {
-          isNaN = 1;
-        }}}}
-        Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-      }   }            
-
-
-# THE CG-STEIHAUG PROCEDURE SCRIPT
-
-# Apply Conjugate Gradient - Steihaug algorithm in order to approximately minimize
-# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
-# under constraint:  ||z|| <= trust_delta.
-# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this transform
-# is given separately because sparse "X" may become dense after applying the transform.
-#
-get_CG_Steihaug_point =
-  function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,
-            Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)
-    return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)
-{
-      trust_delta_sq = trust_delta ^ 2;
-      size_CG = nrow (g);
-      z = matrix (0.0, rows = size_CG, cols = 1);
-      neg_log_l_change = 0.0;
-      reached_trust_boundary = 0;
-      g_reg = g + lambda * beta;
-      r_CG = g_reg;
-      p_CG = -r_CG;
-      rr_CG = sum(r_CG * r_CG);
-      eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
-      converged_CG = 0;
-      if (rr_CG < eps_CG) {
-        converged_CG = 1;
-      }
-      
-      max_iteration_CG = max_iter_CG;
-      if (max_iteration_CG <= 0) {
-        max_iteration_CG = size_CG;
-      }
-      i_CG = 0;
-      while (converged_CG == 0)
-      {
-        i_CG = i_CG + 1;
-        ssX_p_CG = diag (scale_X) %*% p_CG;
-        ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
-        temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
-        q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];
-        pq_CG = sum (p_CG * q_CG);
-        if (pq_CG <= 0) {
-          pp_CG = sum (p_CG * p_CG);  
-          if (pp_CG > 0) {
-            [z, neg_log_l_change] = 
-              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-            reached_trust_boundary = 1;
-          } else {
-            neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-          }
-          converged_CG = 1;
-        }
-        if (converged_CG == 0) {
-          alpha_CG = rr_CG / pq_CG;
-          new_z = z + alpha_CG * p_CG;
-          if (sum(new_z * new_z) >= trust_delta_sq) {
-            pp_CG = sum (p_CG * p_CG);  
-            [z, neg_log_l_change] = 
-              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-            reached_trust_boundary = 1;
-            converged_CG = 1;
-          }
-          if (converged_CG == 0) {
-            z = new_z;
-            old_rr_CG = rr_CG;
-            r_CG = r_CG + alpha_CG * q_CG;
-            rr_CG = sum(r_CG * r_CG);
-            if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
-              neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-              reached_trust_boundary = 0;
-              converged_CG = 1;
-            }
-            if (converged_CG == 0) {
-              p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
-            }   }   }   }   }
-
-
-# An auxiliary function used twice inside the CG-STEIHAUG loop:
-get_trust_boundary_point = 
-  function (Matrix[double] g, Matrix[double] z, Matrix[double] p, 
-            Matrix[double] q, Matrix[double] r, double pp, double pq, 
-            double trust_delta_sq)
-    return (Matrix[double] new_z, double f_change)
-{
-      zz = sum (z * z);  pz = sum (p * z);
-      sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
-      tau_1 = (- pz + sq_root_d) / pp;
-      tau_2 = (- pz - sq_root_d) / pp;
-      zq = sum (z * q);  gp = sum (g * p);
-      f_extra = 0.5 * sum (z * (r + g));
-      f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
-      f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
-      ind1 = as.integer(f_change_1 < f_change_2);
-      ind2 = as.integer(f_change_1 >= f_change_2);
-      new_z = z + ((ind1 * tau_1 + ind2 * tau_2) * p);
-      f_change = ind1 * f_change_1 + ind2 * f_change_2;
-}
-
-
-# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
-straightenX =
-  function (Matrix[double] X, double eps, int max_iter_CG)
-    return   (Matrix[double] w)
-{
-      w_X = t(colSums(X));
-      lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
-      eps_LS = eps * nrow(X);
-      
-      # BEGIN LEAST SQUARES
-      
-      r_LS = - w_X;
-      z_LS = matrix (0.0, rows = ncol(X), cols = 1);
-      p_LS = - r_LS;
-      norm_r2_LS = sum (r_LS ^ 2);
-      i_LS = 0;
-      while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
-      {
-        q_LS = t(X) %*% X %*% p_LS;
-        q_LS = q_LS + lambda_LS * p_LS;
-        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
-        z_LS = z_LS + alpha_LS * p_LS;
-        old_norm_r2_LS = norm_r2_LS;
-        r_LS = r_LS + alpha_LS * q_LS;
-        norm_r2_LS = sum (r_LS ^ 2);
-        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
-        i_LS = i_LS + 1;
-      }
-      
-      # END LEAST SQUARES
-      
-      w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-    }
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT CHOOSES A GLM REGRESSION MODEL IN A STEPWISE ALGIRITHM USING AIC
+# EACH GLM REGRESSION IS SOLVED USING NEWTON/FISHER SCORING WITH TRUST REGIONS
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME  TYPE     DEFAULT   MEANING
+# ---------------------------------------------------------------------------------------------
+# X     String    ---      Location to read the matrix X of feature vectors
+# Y     String    ---      Location to read response matrix Y with 1 column
+# B     String    ---      Location to store estimated regression parameters (the betas)
+# S     String    ---      Location to write the selected features ordered as computed by the algorithm
+# O     String    " "      Location to write the printed statistics; by default is standard output
+# link  Int       2        Link function code: 1 = log, 2 = Logit, 3 = Probit, 4 = Cloglog
+# yneg  Double    0.0      Response value for Bernoulli "No" label, usually 0.0 or -1.0
+# icpt  Int       0        Intercept presence, X columns shifting and rescaling:
+#                          0 = no intercept, no shifting, no rescaling;
+#                          1 = add intercept, but neither shift nor rescale X;
+#                          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# tol   Double    0.000001 Tolerance (epsilon)
+# disp  Double    0.0      (Over-)dispersion value, or 0.0 to estimate it from data
+# moi   Int       200      Maximum number of outer (Newton / Fisher Scoring) iterations
+# mii   Int       0        Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+# thr   Double    0.01     Threshold to stop the algorithm: if the decrease in the value of AIC falls below thr
+#                          no further features are being checked and the algorithm stops 
+# fmt   String   "text"    The betas matrix output format, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: Matrix beta, whose size depends on icpt:
+#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+#
+# In addition, in the last run of GLM some statistics are provided in CSV format, one comma-separated name-value
+# pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# TERMINATION_CODE      A positive integer indicating success/failure as follows:
+#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
+#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
+# BETA_MIN_INDEX        Column index for the smallest beta value
+# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
+# BETA_MAX_INDEX        Column index for the largest beta value
+# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
+#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+# DISPERSION_EST        Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
+# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
+# -------------------------------------------------------------------------------------------
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f StepGLM.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
+# 			 S=OUTPUT_DIR_S/selected O=OUTPUT_DIR/stats link=2 yneg=-1.0 icpt=2 tol=0.00000001 
+#			 disp=1.0 moi=100 mii=10 thr=0.01 fmt=csv  
+#
+# THE StepGLM SCRIPT CURRENTLY SUPPORTS BERNOULLI DISTRIBUTION FAMILY AND THE FOLLOWING LINK FUNCTIONS ONLY!
+# 	- LOG
+#	- LOGIT
+#	- PROBIT
+#	- CLOGLOG
+	
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+intercept_status = ifdef ($icpt, 0);   
+thr = ifdef ($thr, 0.01); 
+bernoulli_No_label = ifdef ($yneg, 0.0);    # $yneg = 0.0;
+distribution_type = 2;
+
+bernoulli_No_label = as.double (bernoulli_No_label);
+
+# currently only the forward selection strategy in supported: start from one feature and iteratively add 
+# features until AIC improves
+dir = "forward";
+
+print("BEGIN STEPWISE GLM SCRIPT");
+print ("Reading X and Y...");
+X_orig = read (fileX);
+Y = read (fileY);
+
+if (distribution_type == 2 & ncol(Y) == 1) {
+	is_Y_negative = ppred (Y, bernoulli_No_label, "==");
+	Y = append (1 - is_Y_negative, is_Y_negative);
+	count_Y_negative = sum (is_Y_negative);
+	if (count_Y_negative == 0) {
+		stop ("StepGLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
+	}
+	if (count_Y_negative == nrow(Y)) {
+		stop ("StepGLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
+	}
+}
+
+num_records = nrow (X_orig);
+num_features = ncol (X_orig);
+
+# BEGIN STEPWISE GENERALIZED LINEAR MODELS 
+
+if (dir == "forward") {  
+	
+	continue = TRUE;
+	columns_fixed = matrix (0, rows = 1, cols = num_features);
+	columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
+  
+	# X_global stores the best model found at each step 
+	X_global = matrix (0, rows = num_records, cols = 1);
+ 
+	if (intercept_status == 0) {
+		# Compute AIC of an empty model with no features and no intercept (all Ys are zero)
+		[AIC_best] = glm (X_global, Y, 0, num_features, columns_fixed_ordered, " ");
+	} else {
+		# compute AIC of an empty model with only intercept (all Ys are constant)
+		all_ones = matrix (1, rows = num_records, cols = 1);
+		[AIC_best] = glm (all_ones, Y, 0, num_features, columns_fixed_ordered, " ");
+	}
+	print ("Best AIC without any features: " + AIC_best);
+  
+	# First pass to examine single features
+	AICs = matrix (AIC_best, rows = 1, cols = num_features);
+	parfor (i in 1:num_features) { 	
+		[AIC_1] = glm (X_orig[,i], Y, intercept_status, num_features, columns_fixed_ordered, " ");
+		AICs[1,i] = AIC_1;
+	}
+  
+	# Determine the best AIC 
+	column_best = 0;	
+	for (k in 1:num_features) {
+		AIC_cur = as.scalar (AICs[1,k]);
+		if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) ) {
+			column_best = k;
+			AIC_best = as.scalar(AICs[1,k]);
+		}
+	}
+  
+	if (column_best == 0) {
+		print ("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
+		if (intercept_status == 0) {
+			# Compute AIC of an empty model with no features and no intercept (all Ys are zero)
+			[AIC_best] = glm (X_global, Y, 0, num_features, columns_fixed_ordered, fileB);
+		} else {
+			# compute AIC of an empty model with only intercept (all Ys are constant)
+			###all_ones = matrix (1, rows = num_records, cols = 1);
+			[AIC_best] = glm (all_ones, Y, 0, num_features, columns_fixed_ordered, fileB);
+		}
+	};
+  
+	print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);	
+	columns_fixed[1,column_best] = 1;
+	columns_fixed_ordered[1,1] = column_best;
+	X_global = X_orig[,column_best];		
+  
+	while (continue) {
+		# Subsequent passes over the features
+		parfor (i in 1:num_features) { 
+			if (as.scalar(columns_fixed[1,i]) == 0) {	
+        
+				# Construct the feature matrix
+				X = append (X_global, X_orig[,i]);
+        
+				[AIC_2] = glm (X, Y, intercept_status, num_features, columns_fixed_ordered, " ");
+				AICs[1,i] = AIC_2;
+			}		
+		}
+    
+		# Determine the best AIC
+		for (k in 1:num_features) {
+			AIC_cur = as.scalar (AICs[1,k]);
+			if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
+				column_best = k;
+				AIC_best = as.scalar(AICs[1,k]);
+			}
+		}
+    
+		# Append best found features (i.e., columns) to X_global
+		if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best feature found
+			print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);
+			columns_fixed[1,column_best] = 1;
+			columns_fixed_ordered = append (columns_fixed_ordered, as.matrix(column_best));
+			if (ncol(columns_fixed_ordered) == num_features) { # all features examined
+				X_global = append (X_global, X_orig[,column_best]);
+				continue = FALSE;
+			} else {
+				X_global = append (X_global, X_orig[,column_best]);
+			}
+		} else {
+		continue = FALSE;
+		}
+	}
+  
+	# run GLM with selected set of features
+	print ("Running GLM with selected features...");
+	[AIC] = glm (X_global, Y, intercept_status, num_features, columns_fixed_ordered, fileB);
+  
+} else {
+	stop ("Currently only forward selection strategy is supported!");
+}
+
+
+################### UDFS USED IN THIS SCRIPT ##################
+
+glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
+		
+	# distribution family code: 1 = Power, 2 = Bernoulli/Binomial; currently only Bernouli distribution family is supported!		
+	distribution_type = 2;                		# $dfam = 2;
+	variance_as_power_of_the_mean = 0.0;  		# $vpow = 0.0;
+	# link function code: 0 = canonical (depends on distribution), 1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit;
+	# currently only log (link = 1), logit (link = 2), probit (link = 3), and cloglog (link = 4) are supported!
+	link_type = ifdef ($link, 2);         		# $link = 2;
+	link_as_power_of_the_mean = 0.0;      		# $lpow = 0.0;
+
+	dispersion = ifdef ($disp, 0.0);            # $disp = 0.0;
+	eps = ifdef ($tol, 0.000001);               # $tol  = 0.000001;
+	max_iteration_IRLS = ifdef ($moi, 200);     # $moi  = 200;
+	max_iteration_CG = ifdef ($mii, 0);         # $mii  = 0;
+
+	variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
+	link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
+
+	dispersion = as.double (dispersion);
+	eps = as.double (eps);              
+
+	# Default values for output statistics:
+	regularization = 0.0;
+	termination_code     = 0.0;
+	min_beta             = 0.0 / 0.0;
+	i_min_beta           = 0.0 / 0.0;
+	max_beta             = 0.0 / 0.0;
+	i_max_beta           = 0.0 / 0.0;
+	intercept_value      = 0.0 / 0.0;
+	dispersion           = 0.0 / 0.0;
+	estimated_dispersion = 0.0 / 0.0;
+	deviance_nodisp      = 0.0 / 0.0;
+	deviance             = 0.0 / 0.0;				  
+                  
+	#####   INITIALIZE THE PARAMETERS   #####
+                  
+    num_records  = nrow (X);
+    num_features = ncol (X);
+    zeros_r = matrix (0, rows = num_records, cols = 1);
+    ones_r = 1 + zeros_r;
+                  
+    # Introduce the intercept, shift and rescale the columns of X if needed
+                  
+    if (intercept_status == 1 | intercept_status == 2) { # add the intercept column
+		X = append (X, ones_r);
+        num_features = ncol (X);
+	}
+                  
+    scale_lambda = matrix (1, rows = num_features, cols = 1);
+    if (intercept_status == 1 | intercept_status == 2) {
+		scale_lambda [num_features, 1] = 0;
+    }
+                  
+    if (intercept_status == 2) {  # scale-&-shift X columns to mean 0, variance 1
+		# Important assumption: X [, num_features] = ones_r
+        avg_X_cols = t(colSums(X)) / num_records;
+        var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
+        is_unsafe = ppred (var_X_cols, 0.0, "<=");
+        scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+        scale_X [num_features, 1] = 1;
+        shift_X = - avg_X_cols * scale_X;
+        shift_X [num_features, 1] = 0;
+        rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
+	} else {
+        scale_X = matrix (1, rows = num_features, cols = 1);
+        shift_X = matrix (0, rows = num_features, cols = 1);
+        rowSums_X_sq = rowSums (X ^ 2);
+    }	
+                  
+    # Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
+    # with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
+    # The transform is then associatively applied to the other side of the expression,
+    # and is rewritten via "scale_X" and "shift_X" as follows:
+    #
+    # ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+    # ssX_A  = diag (scale_X) %*% A;
+    # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
+    #
+    # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+    # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
+                  
+    # Initialize other input-dependent parameters
+                  
+    lambda = scale_lambda * regularization;
+    if (max_iteration_CG == 0) {
+		max_iteration_CG = num_features;
+    }
+                  
+    # Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
+                  
+    if (link_type == 0) {
+		if (distribution_type == 1) {
+			link_type = 1;
+            link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
+		} else { 
+			if (distribution_type == 2) {
+				link_type = 2;
+			}   
+		}   
+	}
+                                
+    # For power distributions and/or links, we use two constants,
+    # "variance as power of the mean" and "link_as_power_of_the_mean",
+    # to specify the variance and the link as arbitrary powers of the
+    # mean.  However, the variance-powers of 1.0 (Poisson family) and
+    # 2.0 (Gamma family) have to be treated as special cases, because
+    # these values integrate into logarithms.  The link-power of 0.0
+    # is also special as it represents the logarithm link.
+                  
+    num_response_columns = ncol (Y);
+    is_supported = 0;              
+	if (num_response_columns == 2 & distribution_type == 2 & link_type >= 1 & link_type <= 4) { # BERNOULLI DISTRIBUTION
+		is_supported = 1;			  
+	}
+	if (num_response_columns == 1 & distribution_type == 2) {
+		print ("Error: Bernoulli response matrix has not been converted into two-column format.");
+    }
+
+	if (is_supported == 1) {
+                    
+		#####   INITIALIZE THE BETAS   #####
+                    
+        [beta, saturated_log_l, isNaN] = 
+			glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
+					  
+		# print(" --- saturated logLik " + saturated_log_l);
+					  
+        if (isNaN == 0) {
+                      
+			#####  START OF THE MAIN PART  #####
+                      
+            sum_X_sq = sum (rowSums_X_sq);
+            trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
+            ###  max_trust_delta = trust_delta * 10000.0;
+            log_l = 0.0;
+            deviance_nodisp = 0.0;
+            new_deviance_nodisp = 0.0;
+            isNaN_log_l = 2;
+            newbeta = beta;
+            g = matrix (0.0, rows = num_features, cols = 1);
+            g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+            accept_new_beta = 1;
+            reached_trust_boundary = 0;
+            neg_log_l_change_predicted = 0.0;
+            i_IRLS = 0;
+                      
+            # print ("BEGIN IRLS ITERATIONS...");
+                      
+            ssX_newbeta = diag (scale_X) %*% newbeta;
+            ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+            all_linear_terms = X %*% ssX_newbeta;
+                      
+            [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+				(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+					  
+            if (isNaN_new_log_l == 0) {
+				new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+                new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+            }
+                      
+            while (termination_code == 0) {
+				accept_new_beta = 1;
+                        
+                if (i_IRLS > 0) {
+					if (isNaN_log_l == 0) {
+						accept_new_beta = 0;
+                    }
+                          
+                    # Decide whether to accept a new iteration point and update the trust region
+                    # See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
+                          
+                    rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
+					if (rho < 0.25 | isNaN_new_log_l == 1) {
+						trust_delta = 0.25 * trust_delta;
+					}
+					if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
+						trust_delta = 2 * trust_delta;
+                            
+						### if (trust_delta > max_trust_delta) {
+						###     trust_delta = max_trust_delta;
+						### }
+					}
+					if (rho > 0.1 & isNaN_new_log_l == 0) {
+						accept_new_beta = 1;
+					}
+				}
+                        
+                if (accept_new_beta == 1) {
+					beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
+                          
+					[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+                          
+					# We introduced these variables to avoid roundoff errors:
+					#     g_Y = y_residual / (y_var * link_grad);
+					#     w   = 1.0 / (y_var * link_grad * link_grad);
+                          
+					gXY = - t(X) %*% g_Y;
+					g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
+					g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+				}
+                        
+                [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
+					get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
+                        
+				newbeta = beta + z;
+                        
+				ssX_newbeta = diag (scale_X) %*% newbeta;
+				ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+				all_linear_terms = X %*% ssX_newbeta;
+                        
+				[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+					(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+                        
+				if (isNaN_new_log_l == 0) {
+					new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+					new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+				}
+                        
+				log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
+                       
+				if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
+				   (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) ) {
+					termination_code = 1;
+				}
+				rho = - log_l_change / neg_log_l_change_predicted;
+				z_norm = sqrt (sum (z * z));
+                                              
+				i_IRLS = i_IRLS + 1;                        
+                        
+				if (i_IRLS == max_iteration_IRLS) {
+					termination_code = 2;
+				}
+			}
+                      
+            beta = newbeta;
+            log_l = new_log_l;
+            deviance_nodisp = new_deviance_nodisp;
+                     
+            #---------------------------- last part
+
+			if (termination_code != 1) {
+				print ("One of the runs of GLM did not converged in " + i_IRLS + " steps!");
+			}
+                      
+            ##### COMPUTE AIC ##### 
+            
+			if (distribution_type == 2 & link_type >= 1 & link_type <= 4) {			
+				AIC = -2 * log_l;
+				if (sum (X) != 0) {
+					AIC = AIC + 2 * num_features;	
+				}
+			} else {
+				stop ("Currently only the Bernoulli distribution family the following link functions are supported: log, logit, probit, and cloglog!");
+			}
+					           
+            if (fileB != " ") {
+				fileO = ifdef ($O, " ");
+				fileS = $S;
+				fmt  = ifdef ($fmt, "text");	
+			
+				# Output which features give the best AIC and are being used for linear regression 
+				write (Selected, fileS, format=fmt);
+		
+				ssX_beta = diag (scale_X) %*% beta;
+                ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
+				if (intercept_status == 2) {
+					beta_out = append (ssX_beta, beta);
+                } else {
+					beta_out = ssX_beta;
+                }
+                
+                if (intercept_status == 0 & num_features == 1) {
+					p = sum (ppred (X, 1, "=="));
+					if (p == num_records) {
+						beta_out = beta_out[1,];
+					}					
+                } 
+
+								
+                if (intercept_status == 1 | intercept_status == 2) {
+					intercept_value = castAsScalar (beta_out [num_features, 1]);
+                    beta_noicept = beta_out [1 : (num_features - 1), 1];
+                } else {
+					beta_noicept = beta_out [1 : num_features, 1];
+                }
+                min_beta = min (beta_noicept);
+                max_beta = max (beta_noicept);
+                tmp_i_min_beta = rowIndexMin (t(beta_noicept))
+                i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
+                tmp_i_max_beta = rowIndexMax (t(beta_noicept))
+                i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
+                        
+                #####  OVER-DISPERSION PART  #####
+                      
+                all_linear_terms = X %*% ssX_beta;
+				[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+                        
+                pearson_residual_sq = g_Y ^ 2 / w;
+                pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
+                # pearson_residual_sq = (y_residual ^ 2) / y_var;
+                        
+                if (num_records > num_features) {
+					estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
+                }
+                if (dispersion <= 0.0) {
+					dispersion = estimated_dispersion;
+                }
+                deviance = deviance_nodisp / dispersion;
+                        
+                #####  END OF THE MAIN PART  #####
+                        
+                str = "BETA_MIN," + min_beta;
+                str = append (str, "BETA_MIN_INDEX," + i_min_beta);
+                str = append (str, "BETA_MAX," + max_beta);
+                str = append (str, "BETA_MAX_INDEX," + i_max_beta);
+                str = append (str, "INTERCEPT," + intercept_value);
+                str = append (str, "DISPERSION," + dispersion);
+                str = append (str, "DISPERSION_EST," + estimated_dispersion);
+                str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
+                str = append (str, "DEVIANCE_SCALED," + deviance);
+                        
+                if (fileO != " ") {
+					write (str, fileO);
+                } 
+				else {
+					print (str);
+                }
+			
+				# Prepare the output matrix
+				print ("Writing the output matrix...");
+                if (intercept_status == 0 & num_features == 1) { 
+					if (p == num_records) {
+						beta_out_tmp = matrix (0, rows = num_features_orig + 1, cols = 1); 
+						beta_out_tmp[num_features_orig + 1,] = beta_out;
+						beta_out = beta_out_tmp;
+						write (beta_out, fileB, format=fmt);
+						stop ("");
+					} else if (sum (X) == 0){
+						beta_out = matrix (0, rows = num_features_orig, cols = 1);
+						write (beta_out, fileB, format=fmt);
+						stop ("");
+					}
+				}
+
+				no_selected = ncol (Selected);
+				max_selected = max (Selected);
+				last = max_selected + 1;	
+		
+				if (intercept_status != 0) {
+		
+					Selected_ext = append (Selected, as.matrix (last));			
+					P1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); 
+
+					if (intercept_status == 2) {
+			
+						P1_ssX_beta = P1 * ssX_beta;
+						P2_ssX_beta = colSums (P1_ssX_beta);
+						P1_beta = P1 * beta;
+						P2_beta = colSums (P1_beta);
+				
+						if (max_selected < num_features_orig) {
+						
+							P2_ssX_beta = append (P2_ssX_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
+							P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
+							
+							P2_ssX_beta[1, num_features_orig+1] = P2_ssX_beta[1, max_selected + 1]; 
+							P2_ssX_beta[1, max_selected + 1] = 0;
+							
+							P2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1]; 
+							P2_beta[1, max_selected + 1] = 0;
+
+						}
+						beta_out = append (t(P2_ssX_beta), t(P2_beta));
+				
+					} else {
+			
+						P1_beta = P1 * beta;
+						P2_beta = colSums (P1_beta);
+				
+						if (max_selected < num_features_orig) {
+							P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
+							P2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1] ; 
+							P2_beta[1, max_selected + 1] = 0;
+						}
+						beta_out = t(P2_beta);
+				
+					}
+				} else {
+		
+					P1 = table (seq (1, no_selected), t(Selected)); 
+					P1_beta = P1 * beta;
+					P2_beta = colSums (P1_beta);	
+
+					if (max_selected < num_features_orig) {
+						P2_beta = append (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));
+					}		
+
+					beta_out = t(P2_beta);	
+				}
+	
+				write ( beta_out, fileB, format=fmt );
+			
+			}
+                      
+		} else { 
+			stop ("Input matrices X and/or Y are out of range!"); 
+        }
+	} else { 
+		stop ("Response matrix with " + num_response_columns + " columns, distribution family (" + distribution_type + ", " + variance_as_power_of_the_mean
+               + ") and link family (" + link_type + ", " + link_as_power_of_the_mean + ") are NOT supported together.");
+    }
+}
+
+glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
+  return (Matrix[double] beta, double saturated_log_l, int isNaN)
+{
+    saturated_log_l = 0.0;
+    isNaN = 0;
+    y_corr = Y [, 1];
+    if (dist_type == 2) {
+      n_corr = rowSums (Y);
+      is_n_zero = ppred (n_corr, 0.0, "==");
+      y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
+    }
+    linear_terms = y_corr;
+    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
+      if          (link_power ==  0.0) {
+        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+          is_zero_y_corr = ppred (y_corr, 0.0, "==");
+          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        } else { isNaN = 1; }
+      } else { if (link_power ==  1.0) {
+        linear_terms = y_corr;
+      } else { if (link_power == -1.0) {
+        linear_terms = 1.0 / y_corr;
+      } else { if (link_power ==  0.5) {
+        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+          linear_terms = sqrt (y_corr);
+        } else { isNaN = 1; }
+      } else { if (link_power >   0.0) {
+        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+          is_zero_y_corr = ppred (y_corr, 0.0, "==");
+          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+        } else { isNaN = 1; }
+      } else {
+        if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+          linear_terms = y_corr ^ link_power;
+        } else { isNaN = 1; }
+      }}}}}
+    }
+    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+    { # BINOMIAL/BERNOULLI DISTRIBUTION
+      if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+          is_zero_y_corr = ppred (y_corr, 0.0, "==");
+          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        } else { isNaN = 1; }
+      } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
+        if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+          is_zero_y_corr = ppred (y_corr, 0.0, "==");
+          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+        } else { isNaN = 1; }
+      } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
+        if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+          linear_terms = y_corr ^ link_power;
+        } else { isNaN = 1; }
+      } else { 
+        is_zero_y_corr = ppred (y_corr, 0.0, "<=");
+        is_one_y_corr  = ppred (y_corr, 1.0, ">=");
+        y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
+        if (link_type == 2)                           { # Binomial.logit
+          linear_terms = log (y_corr / (1.0 - y_corr)) 
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        } else { if (link_type == 3)                  { # Binomial.probit
+          y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
+          t = sqrt (- 2.0 * log (y_below_half));
+          approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
+          linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        } else { if (link_type == 4)                  { # Binomial.cloglog
+          linear_terms = log (- log (1.0 - y_corr))
+          - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        } else { if (link_type == 5)                  { # Binomial.cauchit
+          linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        }}  }}}}}
+    }
+    
+    if (isNaN == 0) {
+      [saturated_log_l, isNaN] = 
+        glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
+    }
+    
+    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
+          (dist_type == 2 & link_type >= 2))
+    {    
+      desired_eta = 0.0;
+    } else { if (link_type == 1 & link_power == 0.0) {
+      desired_eta = log (0.5);
+    } else { if (link_type == 1) {
+      desired_eta = 0.5 ^ link_power;
+    } else {
+      desired_eta = 0.5;
+    }}}
+    
+    beta = matrix (0.0, rows = ncol(X), cols = 1);
+    
+    if (desired_eta != 0.0) {
+      if (icept_status == 1 | icept_status == 2) {
+        beta [nrow(beta), 1] = desired_eta;
+      } else {
+        # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
+        # Note that "ssX_transform" is trivial here, hence ignored
+        
+        beta = straightenX (X, 0.000001, max_iter_CG);  
+        beta = beta * desired_eta;
+      }   }   }
+
+
+glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
+                     int dist_type, double var_power, int link_type, double link_power)
+  return (Matrix[double] g_Y, Matrix[double] w)
+# ORIGINALLY we returned more meaningful vectors, namely:
+# Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
+# Matrix[double] link_gradient : derivative of the link function
+# Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
+# BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
+# and skip over the "meaningful intermediaries".  Now we output these two variables:
+#     g_Y = y_residual / (var_function * link_gradient);
+#     w   = 1.0 / (var_function * link_gradient ^ 2);
+{
+    num_records = nrow (linear_terms);
+    zeros_r = matrix (0.0, rows = num_records, cols = 1);
+    ones_r = 1 + zeros_r;
+    g_Y  = zeros_r;
+    w  = zeros_r;
+    
+    # Some constants
+    
+    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
+    ones_2 = matrix (1.0, rows = 1, cols = 2);
+    p_one_m_one = ones_2;
+    p_one_m_one [1, 2] = -1.0;
+    m_one_p_one = ones_2;
+    m_one_p_one [1, 1] = -1.0;
+    zero_one = ones_2;
+    zero_one [1, 1] = 0.0;
+    one_zero = ones_2;
+    one_zero [1, 2] =

<TRUNCATED>


[15/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/AllMin.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMin.R b/src/test/scripts/functions/aggregate/AllMin.R
index e67ca08..827e4a7 100644
--- a/src/test/scripts/functions/aggregate/AllMin.R
+++ b/src/test/scripts/functions/aggregate/AllMin.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(min(A)); 
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(min(A)); 
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/AllProd.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllProd.R b/src/test/scripts/functions/aggregate/AllProd.R
index b4c9907..a87a12c 100644
--- a/src/test/scripts/functions/aggregate/AllProd.R
+++ b/src/test/scripts/functions/aggregate/AllProd.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(prod(A)); 
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(prod(A)); 
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/AllSum.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllSum.R b/src/test/scripts/functions/aggregate/AllSum.R
index 7f979c1..0ed03d9 100644
--- a/src/test/scripts/functions/aggregate/AllSum.R
+++ b/src/test/scripts/functions/aggregate/AllSum.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(sum(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(sum(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/ColMaxs.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColMaxs.R b/src/test/scripts/functions/aggregate/ColMaxs.R
index 2c3ea32..e0dce9b 100644
--- a/src/test/scripts/functions/aggregate/ColMaxs.R
+++ b/src/test/scripts/functions/aggregate/ColMaxs.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-if(!("matrixStats" %in% rownames(installed.packages()))){
-   install.packages("matrixStats")
-}
-
-library("Matrix")
-library("matrixStats") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- t(colMaxs(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+if(!("matrixStats" %in% rownames(installed.packages()))){
+   install.packages("matrixStats")
+}
+
+library("Matrix")
+library("matrixStats") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- t(colMaxs(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/ColMeans.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColMeans.R b/src/test/scripts/functions/aggregate/ColMeans.R
index 80226f9..e59b06b 100644
--- a/src/test/scripts/functions/aggregate/ColMeans.R
+++ b/src/test/scripts/functions/aggregate/ColMeans.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- t(colMeans(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- t(colMeans(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/ColMins.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColMins.R b/src/test/scripts/functions/aggregate/ColMins.R
index 80b0f02..5177529 100644
--- a/src/test/scripts/functions/aggregate/ColMins.R
+++ b/src/test/scripts/functions/aggregate/ColMins.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-if(!("matrixStats" %in% rownames(installed.packages()))){
-   install.packages("matrixStats")
-}
-
-library("Matrix")
-library("matrixStats") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- t(colMins(A)); 
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+if(!("matrixStats" %in% rownames(installed.packages()))){
+   install.packages("matrixStats")
+}
+
+library("Matrix")
+library("matrixStats") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- t(colMins(A)); 
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/ColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/ColSums.R b/src/test/scripts/functions/aggregate/ColSums.R
index 0f3e6e2..459108b 100644
--- a/src/test/scripts/functions/aggregate/ColSums.R
+++ b/src/test/scripts/functions/aggregate/ColSums.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- t(colSums(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- t(colSums(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/DiagSum.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/DiagSum.R b/src/test/scripts/functions/aggregate/DiagSum.R
index 5dc92ac..04b37c5 100644
--- a/src/test/scripts/functions/aggregate/DiagSum.R
+++ b/src/test/scripts/functions/aggregate/DiagSum.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(sum(diag(A)));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(sum(diag(A)));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/GroupedAggregate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/GroupedAggregate.R b/src/test/scripts/functions/aggregate/GroupedAggregate.R
index 34d4e28..63acceb 100644
--- a/src/test/scripts/functions/aggregate/GroupedAggregate.R
+++ b/src/test/scripts/functions/aggregate/GroupedAggregate.R
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
-fn = as.integer(args[2]);
-
-if( fn==0 )
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=sum)[,2]
-}
-
-if( fn==1 ) 
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=length)[,2]
-}
-
-if( fn==2 ) 
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=mean)[,2]
-}
-
-if( fn==3 ) 
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=var)[,2]
-}
-
-if( fn==4 ) 
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
-}
-
-if( fn==5 ) 
-{
-   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
+fn = as.integer(args[2]);
+
+if( fn==0 )
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=sum)[,2]
+}
+
+if( fn==1 ) 
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=length)[,2]
+}
+
+if( fn==2 ) 
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=mean)[,2]
+}
+
+if( fn==3 ) 
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=var)[,2]
+}
+
+if( fn==4 ) 
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
+}
+
+if( fn==5 ) 
+{
+   C = aggregate(as.vector(A), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
+}
+
 writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/GroupedAggregateMatrix.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/GroupedAggregateMatrix.R b/src/test/scripts/functions/aggregate/GroupedAggregateMatrix.R
index 76e2d79..d67b978 100644
--- a/src/test/scripts/functions/aggregate/GroupedAggregateMatrix.R
+++ b/src/test/scripts/functions/aggregate/GroupedAggregateMatrix.R
@@ -1,70 +1,70 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
-fn = as.integer(args[2]);
-
-
-R = matrix(0,17,ncol(A));
-for( j in 1:ncol(A) )
-{
-Ai = A[,j];
-
-if( fn==0 )
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=sum)[,2]
-}
-
-if( fn==1 ) 
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=length)[,2]
-}
-
-if( fn==2 ) 
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=mean)[,2]
-}
-
-if( fn==3 ) 
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=var)[,2]
-}
-
-if( fn==4 ) 
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
-}
-
-if( fn==5 ) 
-{
-   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
-}
-
-R[,j] = C;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
+fn = as.integer(args[2]);
+
+
+R = matrix(0,17,ncol(A));
+for( j in 1:ncol(A) )
+{
+Ai = A[,j];
+
+if( fn==0 )
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=sum)[,2]
+}
+
+if( fn==1 ) 
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=length)[,2]
+}
+
+if( fn==2 ) 
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=mean)[,2]
+}
+
+if( fn==3 ) 
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=var)[,2]
+}
+
+if( fn==4 ) 
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
+}
+
+if( fn==5 ) 
+{
+   C = aggregate(as.vector(Ai), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
+}
+
+R[,j] = C;
+}
+
 writeMM(as(R, "CsparseMatrix"), paste(args[3], "C", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/GroupedAggregateWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/GroupedAggregateWeights.R b/src/test/scripts/functions/aggregate/GroupedAggregateWeights.R
index 5aa3e63..eea2f94 100644
--- a/src/test/scripts/functions/aggregate/GroupedAggregateWeights.R
+++ b/src/test/scripts/functions/aggregate/GroupedAggregateWeights.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
-C <- as.matrix(readMM(paste(args[1], "C.mtx", sep="")));
-fn = as.integer(args[2]);
-
-if( nrow(A)==1 & ncol(A)>1 ){ #row vector
-   A = t(A);
-}
-
-if( fn==0 )
-{
-   #special case weights
-   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=sum)[,2]
-}
-
-if( fn==1 ) 
-{
-   #special case weights
-   D = aggregate(as.vector(C), by=list(as.vector(B)), FUN=sum)[,2]
-}
-
-if( fn==2 ) 
-{
-   #special case weights
-   D1 = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=sum)[,2]
-	 D2 = aggregate(as.vector(C), by=list(as.vector(B)), FUN=sum)[,2]
-   D = D1/D2;
-}
-
-if( fn==3 ) 
-{
-   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=var)[,2]
-}
-
-if( fn==4 ) 
-{
-   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
-}
-
-if( fn==5 ) 
-{
-   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
+C <- as.matrix(readMM(paste(args[1], "C.mtx", sep="")));
+fn = as.integer(args[2]);
+
+if( nrow(A)==1 & ncol(A)>1 ){ #row vector
+   A = t(A);
+}
+
+if( fn==0 )
+{
+   #special case weights
+   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=sum)[,2]
+}
+
+if( fn==1 ) 
+{
+   #special case weights
+   D = aggregate(as.vector(C), by=list(as.vector(B)), FUN=sum)[,2]
+}
+
+if( fn==2 ) 
+{
+   #special case weights
+   D1 = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=sum)[,2]
+	 D2 = aggregate(as.vector(C), by=list(as.vector(B)), FUN=sum)[,2]
+   D = D1/D2;
+}
+
+if( fn==3 ) 
+{
+   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=var)[,2]
+}
+
+if( fn==4 ) 
+{
+   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=moment, order=3, central=TRUE)[,2]
+}
+
+if( fn==5 ) 
+{
+   D = aggregate(as.vector(A*C), by=list(as.vector(B)), FUN=moment, order=4, central=TRUE)[,2]
+}
+
 writeMM(as(D, "CsparseMatrix"), paste(args[3], "D", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowIndexMaxs.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowIndexMaxs.R b/src/test/scripts/functions/aggregate/RowIndexMaxs.R
index 2104ada..7c058d7 100644
--- a/src/test/scripts/functions/aggregate/RowIndexMaxs.R
+++ b/src/test/scripts/functions/aggregate/RowIndexMaxs.R
@@ -1,28 +1,28 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- max.col(A,ties.method="last");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- max.col(A,ties.method="last");
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowIndexMins.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowIndexMins.R b/src/test/scripts/functions/aggregate/RowIndexMins.R
index 3fa734e..5d317e3 100644
--- a/src/test/scripts/functions/aggregate/RowIndexMins.R
+++ b/src/test/scripts/functions/aggregate/RowIndexMins.R
@@ -1,28 +1,28 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- max.col(-A,ties.method="last");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- max.col(-A,ties.method="last");
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowIndexMins.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowIndexMins.dml b/src/test/scripts/functions/aggregate/RowIndexMins.dml
index f535021..0579aff 100644
--- a/src/test/scripts/functions/aggregate/RowIndexMins.dml
+++ b/src/test/scripts/functions/aggregate/RowIndexMins.dml
@@ -1,24 +1,24 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = read($1, rows=$2, cols=$3, format="text");
-B = rowIndexMin(A);
-write(B, $4, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1, rows=$2, cols=$3, format="text");
+B = rowIndexMin(A);
+write(B, $4, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowMaxs.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowMaxs.R b/src/test/scripts/functions/aggregate/RowMaxs.R
index 7eb92ab..6e88a56 100644
--- a/src/test/scripts/functions/aggregate/RowMaxs.R
+++ b/src/test/scripts/functions/aggregate/RowMaxs.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-if(!("matrixStats" %in% rownames(installed.packages()))){
-   install.packages("matrixStats")
-}
-
-library("Matrix")
-library("matrixStats") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- rowMaxs(A);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+if(!("matrixStats" %in% rownames(installed.packages()))){
+   install.packages("matrixStats")
+}
+
+library("Matrix")
+library("matrixStats") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- rowMaxs(A);
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowMeans.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowMeans.R b/src/test/scripts/functions/aggregate/RowMeans.R
index b31c182..6c57537 100644
--- a/src/test/scripts/functions/aggregate/RowMeans.R
+++ b/src/test/scripts/functions/aggregate/RowMeans.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- rowMeans(A);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- rowMeans(A);
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowMins.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowMins.R b/src/test/scripts/functions/aggregate/RowMins.R
index 4c02e56..af77c57 100644
--- a/src/test/scripts/functions/aggregate/RowMins.R
+++ b/src/test/scripts/functions/aggregate/RowMins.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-if(!("matrixStats" %in% rownames(installed.packages()))){
-   install.packages("matrixStats")
-}
-
-library("Matrix")
-library("matrixStats") 
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- rowMins(A);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+if(!("matrixStats" %in% rownames(installed.packages()))){
+   install.packages("matrixStats")
+}
+
+library("Matrix")
+library("matrixStats") 
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- rowMins(A);
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/RowSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/RowSums.R b/src/test/scripts/functions/aggregate/RowSums.R
index 37c6f06..592d5ec 100644
--- a/src/test/scripts/functions/aggregate/RowSums.R
+++ b/src/test/scripts/functions/aggregate/RowSums.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- rowSums(A);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- rowSums(A);
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendChainTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendChainTest.R b/src/test/scripts/functions/append/AppendChainTest.R
index 32a3507..caea086 100644
--- a/src/test/scripts/functions/append/AppendChainTest.R
+++ b/src/test/scripts/functions/append/AppendChainTest.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.matrix(A1);
-B1=readMM(paste(args[1], "B1.mtx", sep=""))
-B1 = as.matrix(B1);
-B2=readMM(paste(args[1], "B2.mtx", sep=""))
-B2 = as.matrix(B2);
-C=cbind2(A, B1)
-C=cbind2(C, B2)
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.matrix(A1);
+B1=readMM(paste(args[1], "B1.mtx", sep=""))
+B1 = as.matrix(B1);
+B2=readMM(paste(args[1], "B2.mtx", sep=""))
+B2 = as.matrix(B2);
+C=cbind2(A, B1)
+C=cbind2(C, B2)
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendChainTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendChainTest.dml b/src/test/scripts/functions/append/AppendChainTest.dml
index 56f75d1..69797c7 100644
--- a/src/test/scripts/functions/append/AppendChainTest.dml
+++ b/src/test/scripts/functions/append/AppendChainTest.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A=read($1, rows=$2, cols=$3, format="text")
-B1=read($4, rows=$2, cols=$5, format="text")
-B2=read($6, rows=$2, cols=$7, format="text")
-C=append(A, B1)
-C=append(C, B2)
-write(C, $8, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A=read($1, rows=$2, cols=$3, format="text")
+B1=read($4, rows=$2, cols=$5, format="text")
+B2=read($6, rows=$2, cols=$7, format="text")
+C=append(A, B1)
+C=append(C, B2)
+write(C, $8, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendMatrixTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendMatrixTest.R b/src/test/scripts/functions/append/AppendMatrixTest.R
index faba609..4e1902c 100644
--- a/src/test/scripts/functions/append/AppendMatrixTest.R
+++ b/src/test/scripts/functions/append/AppendMatrixTest.R
@@ -1,30 +1,30 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.matrix(A1);
-B1=readMM(paste(args[1], "B.mtx", sep=""))
-B = as.matrix(B1);
-C=cbind2(A, B)
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.matrix(A1);
+B1=readMM(paste(args[1], "B.mtx", sep=""))
+B = as.matrix(B1);
+C=cbind2(A, B)
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendMatrixTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendMatrixTest.dml b/src/test/scripts/functions/append/AppendMatrixTest.dml
index f4318b7..219e3c3 100644
--- a/src/test/scripts/functions/append/AppendMatrixTest.dml
+++ b/src/test/scripts/functions/append/AppendMatrixTest.dml
@@ -1,25 +1,25 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A=read($1, rows=$2, cols=$3, format="text")
-B=read($4, rows=$2, cols=$5, format="text")
-C=append(A, B)
-write(C, $6, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A=read($1, rows=$2, cols=$3, format="text")
+B=read($4, rows=$2, cols=$5, format="text")
+C=append(A, B)
+write(C, $6, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendVectorTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendVectorTest.R b/src/test/scripts/functions/append/AppendVectorTest.R
index faba609..4e1902c 100644
--- a/src/test/scripts/functions/append/AppendVectorTest.R
+++ b/src/test/scripts/functions/append/AppendVectorTest.R
@@ -1,30 +1,30 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.matrix(A1);
-B1=readMM(paste(args[1], "B.mtx", sep=""))
-B = as.matrix(B1);
-C=cbind2(A, B)
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.matrix(A1);
+B1=readMM(paste(args[1], "B.mtx", sep=""))
+B = as.matrix(B1);
+C=cbind2(A, B)
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/AppendVectorTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/AppendVectorTest.dml b/src/test/scripts/functions/append/AppendVectorTest.dml
index 691a56b..9bc5df2 100644
--- a/src/test/scripts/functions/append/AppendVectorTest.dml
+++ b/src/test/scripts/functions/append/AppendVectorTest.dml
@@ -1,25 +1,25 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A=read($1, rows=$2, cols=$3, format="text")
-B=read($4, rows=$2, cols=1, format="text")
-C=append(A, B)
-write(C, $5, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A=read($1, rows=$2, cols=$3, format="text")
+B=read($4, rows=$2, cols=1, format="text")
+C=append(A, B)
+write(C, $5, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/RBindCBindMatrixTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/RBindCBindMatrixTest.R b/src/test/scripts/functions/append/RBindCBindMatrixTest.R
index 016d80f..eac2241 100644
--- a/src/test/scripts/functions/append/RBindCBindMatrixTest.R
+++ b/src/test/scripts/functions/append/RBindCBindMatrixTest.R
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A=as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B=as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C=t(cbind2(t(A), t(B)))
-
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A=as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B=as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C=t(cbind2(t(A), t(B)))
+
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/RBindCBindMatrixTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/RBindCBindMatrixTest.dml b/src/test/scripts/functions/append/RBindCBindMatrixTest.dml
index edd1080..22f654a 100644
--- a/src/test/scripts/functions/append/RBindCBindMatrixTest.dml
+++ b/src/test/scripts/functions/append/RBindCBindMatrixTest.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A=read($1)
-B=read($2)
-
-C = t(cbind(t(A),t(B)));
-
-write(C, $3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A=read($1)
+B=read($2)
+
+C = t(cbind(t(A),t(B)));
+
+write(C, $3)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/RBindMatrixTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/RBindMatrixTest.R b/src/test/scripts/functions/append/RBindMatrixTest.R
index a5950a2..7abb796 100644
--- a/src/test/scripts/functions/append/RBindMatrixTest.R
+++ b/src/test/scripts/functions/append/RBindMatrixTest.R
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A=as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B=as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C=rbind2(A, B)
-
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A=as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B=as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C=rbind2(A, B)
+
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/RBindMatrixTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/RBindMatrixTest.dml b/src/test/scripts/functions/append/RBindMatrixTest.dml
index cb06454..8b0dcde 100644
--- a/src/test/scripts/functions/append/RBindMatrixTest.dml
+++ b/src/test/scripts/functions/append/RBindMatrixTest.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A=read($1)
-B=read($2)
-
-C = rbind(A,B);
-
-write(C, $3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A=read($1)
+B=read($2)
+
+C = rbind(A,B);
+
+write(C, $3)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/basic_string_append.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/basic_string_append.dml b/src/test/scripts/functions/append/basic_string_append.dml
index fca23c5..b4bd889 100644
--- a/src/test/scripts/functions/append/basic_string_append.dml
+++ b/src/test/scripts/functions/append/basic_string_append.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-s = "# Name Value";
-s = append(s, "A = " + (7 + $1 + 1));
-s = append(s, "B = " + (3 + $1 + 1));
-
-print(s);
-write(s, $2);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+s = "# Name Value";
+s = append(s, "A = " + (7 + $1 + 1));
+s = append(s, "B = " + (3 + $1 + 1));
+
+print(s);
+write(s, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/append/loop_string_append.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/append/loop_string_append.dml b/src/test/scripts/functions/append/loop_string_append.dml
index 934538d..ffc9a1c 100644
--- a/src/test/scripts/functions/append/loop_string_append.dml
+++ b/src/test/scripts/functions/append/loop_string_append.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-numIter = $1;
-
-s = "# Name Value";
-for( i in 1:numIter )
-{
-   # more than 100 bytes (will throw an error for more than 10000 iterations)
-   s = append(s, "A["+i+"] = ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ, " +i);
-}
-
-write(s, $2);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+numIter = $1;
+
+s = "# Name Value";
+for( i in 1:numIter )
+{
+   # more than 100 bytes (will throw an error for more than 10000 iterations)
+   s = append(s, "A["+i+"] = ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ, " +i);
+}
+
+write(s, $2);


[13/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication.R b/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication.R
index 4909173..d70c7a6 100644
--- a/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-P <- diag( as.vector(B==2) )
-Px <- P[rowSums((P==0) | is.na(P)) != ncol(P),];
-
-C <- Px %*% A;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+P <- diag( as.vector(B==2) )
+Px <- P[rowSums((P==0) | is.na(P)) != ncol(P),];
+
+C <- Px %*% A;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication2.R b/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication2.R
index 4909173..d70c7a6 100644
--- a/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication2.R
+++ b/src/test/scripts/functions/binary/matrix/UltraSparseMatrixMultiplication2.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-P <- diag( as.vector(B==2) )
-Px <- P[rowSums((P==0) | is.na(P)) != ncol(P),];
-
-C <- Px %*% A;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+P <- diag( as.vector(B==2) )
+Px <- P[rowSums((P==0) | is.na(P)) != ncol(P),];
+
+C <- Px %*% A;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/ZipMMTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/ZipMMTest.R b/src/test/scripts/functions/binary/matrix/ZipMMTest.R
index 8b73634..bfc621a 100644
--- a/src/test/scripts/functions/binary/matrix/ZipMMTest.R
+++ b/src/test/scripts/functions/binary/matrix/ZipMMTest.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = t(A) %*% B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = t(A) %*% B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Addition.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Addition.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Addition.R
index c93053c..94809b6 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Addition.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Addition.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A+B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A+B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Division.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Division.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Division.R
index 2058e29..79afbc5 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Division.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Division.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A/B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A/B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Multiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Multiplication.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Multiplication.R
index d32c17c..70573e0 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Multiplication.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Multiplication.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A*B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A*B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Substraction.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Substraction.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Substraction.R
index f07a257..2dcd410 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Substraction.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixCellwiseOperation_Substraction.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A-B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A-B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Addition.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Addition.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Addition.R
index 8cf01d9..0956055 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Addition.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Addition.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.vector(B1);
-
-
-C <- A+B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.vector(B1);
+
+
+C <- A+B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Division.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Division.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Division.R
index 0d64579..c7026bd 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Division.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Division.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.vector(B1);
-
-
-C <- A/B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.vector(B1);
+
+
+C <- A/B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Multiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Multiplication.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Multiplication.R
index 33bc86d..383a819 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Multiplication.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Multiplication.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.vector(B1);
-
-
-C <- A*B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.vector(B1);
+
+
+C <- A*B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Substraction.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Substraction.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Substraction.R
index 50b7120..149cf25 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Substraction.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorColCellwiseOperation_Substraction.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.vector(B1);
-
-
-C <- A-B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.vector(B1);
+
+
+C <- A-B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Addition.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Addition.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Addition.R
index 96ac330..a582836 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Addition.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Addition.R
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-#C <- A+B; #not supported on row vectors
-C <- t(t(A)+as.vector(t(B)))
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+#C <- A+B; #not supported on row vectors
+C <- t(t(A)+as.vector(t(B)))
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Division.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Division.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Division.R
index 9baa59d..3155b12 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Division.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Division.R
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-#C <- A/B; #not supported on row vectors
-C <- t(t(A)/as.vector(t(B)))
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+#C <- A/B; #not supported on row vectors
+C <- t(t(A)/as.vector(t(B)))
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Multiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Multiplication.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Multiplication.R
index df21560..1e18d8f 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Multiplication.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Multiplication.R
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-#C <- A*B; #not supported on row vectors
-C <- t(t(A)*as.vector(t(B)))
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+#C <- A*B; #not supported on row vectors
+C <- t(t(A)*as.vector(t(B)))
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Substraction.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Substraction.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Substraction.R
index c8588be..659001e 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Substraction.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullMatrixVectorRowCellwiseOperation_Substraction.R
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-#C <- A-B; #not supported on row vectors
-C <- t(t(A)-as.vector(t(B)))
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+#C <- A-B; #not supported on row vectors
+C <- t(t(A)-as.vector(t(B)))
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/FullVectorVectorCellwiseOperation.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/FullVectorVectorCellwiseOperation.R b/src/test/scripts/functions/binary/matrix_full_cellwise/FullVectorVectorCellwiseOperation.R
index 6a59bc8..167356b 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/FullVectorVectorCellwiseOperation.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/FullVectorVectorCellwiseOperation.R
@@ -19,27 +19,27 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.vector(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.vector(readMM(paste(args[1], "B.mtx", sep="")))
-
-opcode = args[2];
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.vector(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.vector(readMM(paste(args[1], "B.mtx", sep="")))
+
+opcode = args[2];
 if( opcode == "lt" ) { opcode = "<" }
 if( opcode == "le" ) { opcode = "<=" }
 if( opcode == "gt" ) { opcode = ">" }
 if( opcode == "ge" ) { opcode = ">=" }
 if( opcode == "eq" ) { opcode = "==" }
-if( opcode == "ne" ) { opcode = "!=" }
-if( opcode == "mult" ) { opcode = "*" }
-
-C <- outer(A, B, opcode)
-C <- as.matrix(C)
-
-writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+if( opcode == "ne" ) { opcode = "!=" }
+if( opcode == "mult" ) { opcode = "*" }
+
+C <- outer(A, B, opcode)
+C <- as.matrix(C)
+
+writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_cellwise/Minus1MultTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_cellwise/Minus1MultTest.R b/src/test/scripts/functions/binary/matrix_full_cellwise/Minus1MultTest.R
index f34b164..30e543e 100644
--- a/src/test/scripts/functions/binary/matrix_full_cellwise/Minus1MultTest.R
+++ b/src/test/scripts/functions/binary/matrix_full_cellwise/Minus1MultTest.R
@@ -1,39 +1,39 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-if( nrow(A) == 1 ){
-   C = 1 - as.double(A) * B;
-} else if( nrow(B) == 1 ){
-   C = 1 - A * as.double(B);
-} else {
-   C = 1 - A * B;
-} 
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+if( nrow(A) == 1 ){
+   C = 1 - as.double(A) * B;
+} else if( nrow(B) == 1 ){
+   C = 1 - A * as.double(B);
+} else {
+   C = 1 - A * B;
+} 
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/FullDistributedMatrixMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/FullDistributedMatrixMultiplication.R b/src/test/scripts/functions/binary/matrix_full_other/FullDistributedMatrixMultiplication.R
index 184c144..94cd5d6 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/FullDistributedMatrixMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/FullDistributedMatrixMultiplication.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A%*%B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A%*%B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/FullMatrixMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/FullMatrixMultiplication.R b/src/test/scripts/functions/binary/matrix_full_other/FullMatrixMultiplication.R
index 184c144..94cd5d6 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/FullMatrixMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/FullMatrixMultiplication.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-B1 <- readMM(paste(args[1], "B.mtx", sep=""))
-B <- as.matrix(B1);
-
-
-C <- A%*%B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+B1 <- readMM(paste(args[1], "B.mtx", sep=""))
+B <- as.matrix(B1);
+
+
+C <- A%*%B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/FullMinMaxComparison.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/FullMinMaxComparison.R b/src/test/scripts/functions/binary/matrix_full_other/FullMinMaxComparison.R
index 91d9068..a156e39 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/FullMinMaxComparison.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/FullMinMaxComparison.R
@@ -19,40 +19,40 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-if( as.integer(args[2])==1 ){
-  # MIN
-  if( nrow(A)>1 | nrow(B)>1 ){
-     if( nrow(B)>nrow(A) ) {
-       C <- pmin(B, A);
-     }  
-     else {
-       C <- pmin(A, B);
-     }
-  }else{
-     C <- min(A, B);
-  }    
-} else{
-  # MAX  
-  if( nrow(A)>1 | nrow(B)>1 ){
-     if( nrow(B)>nrow(A) ){
-       C <- pmax(B, A);
-     }else{
-       C <- pmax(A, B);
-     }  
-  }else{
-     C <- max(A, B);
-  } 
-}
-
-writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[3], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+if( as.integer(args[2])==1 ){
+  # MIN
+  if( nrow(A)>1 | nrow(B)>1 ){
+     if( nrow(B)>nrow(A) ) {
+       C <- pmin(B, A);
+     }  
+     else {
+       C <- pmin(A, B);
+     }
+  }else{
+     C <- min(A, B);
+  }    
+} else{
+  # MAX  
+  if( nrow(A)>1 | nrow(B)>1 ){
+     if( nrow(B)>nrow(A) ){
+       C <- pmax(B, A);
+     }else{
+       C <- pmax(A, B);
+     }  
+  }else{
+     C <- max(A, B);
+  } 
+}
+
+writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[3], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/FullPower.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/FullPower.R b/src/test/scripts/functions/binary/matrix_full_other/FullPower.R
index 1563697..0223632 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/FullPower.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/FullPower.R
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-if( nrow(A)==1 ){ #support for scalars        
-   A <- as.numeric(A);
-}
-if( nrow(B)==1 ){ #support for scalars
-   B <- as.numeric(B);
-}
-C <- A^B;
-
-#note: writeMM replaces NaN and Inf
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( nrow(A)==1 ){ #support for scalars        
+   A <- as.numeric(A);
+}
+if( nrow(B)==1 ){ #support for scalars
+   B <- as.numeric(B);
+}
+C <- A^B;
+
+#note: writeMM replaces NaN and Inf
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_div.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_div.R b/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_div.R
index 9f528d5..1221f70 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_div.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_div.R
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-#options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-if( nrow(A)==1 ){ #support for scalars        
-   A <- as.numeric(A);
-}
-if( nrow(B)==1 ){ #support for scalars
-   B <- as.numeric(B);
-}
-C <- A%/%B;
-
-#note: writeMM replaces NaN and Inf
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+#options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( nrow(A)==1 ){ #support for scalars        
+   A <- as.numeric(A);
+}
+if( nrow(B)==1 ){ #support for scalars
+   B <- as.numeric(B);
+}
+C <- A%/%B;
+
+#note: writeMM replaces NaN and Inf
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_mod.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_mod.R b/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_mod.R
index f70d1b1..4588ef8 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_mod.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/IntegerDivision_mod.R
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-if( nrow(A)==1 ){ #support for scalars        
-   A <- as.numeric(A);
-}
-if( nrow(B)==1 ){ #support for scalars
-   B <- as.numeric(B);
-}
-C <- A%%B;
-
-#note: writeMM replaces NaN and Inf
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( nrow(A)==1 ){ #support for scalars        
+   A <- as.numeric(A);
+}
+if( nrow(B)==1 ){ #support for scalars
+   B <- as.numeric(B);
+}
+C <- A%%B;
+
+#note: writeMM replaces NaN and Inf
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/PPredMatrixTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/PPredMatrixTest.R b/src/test/scripts/functions/binary/matrix_full_other/PPredMatrixTest.R
index 5990c0f..6f57134 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/PPredMatrixTest.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/PPredMatrixTest.R
@@ -19,41 +19,41 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- readMM(paste(args[1], "A.mtx", sep=""))
-B <- readMM(paste(args[1], "B.mtx", sep=""))
-
-type = as.integer(args[2])
-
-if( type == 0 )
-{
-   C = (A > B)
-}
-if( type == 1 )
-{
-   C = (A < B)
-}
-if( type == 2 )
-{
-   C = (A == B)
-}
-if( type == 3 )
-{
-   C = (A != B)
-}
-if( type == 4 )
-{
-   C = (A >= B)
-}
-if( type == 5 )
-{
-   C = (A <= B)
-}
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- readMM(paste(args[1], "A.mtx", sep=""))
+B <- readMM(paste(args[1], "B.mtx", sep=""))
+
+type = as.integer(args[2])
+
+if( type == 0 )
+{
+   C = (A > B)
+}
+if( type == 1 )
+{
+   C = (A < B)
+}
+if( type == 2 )
+{
+   C = (A == B)
+}
+if( type == 3 )
+{
+   C = (A != B)
+}
+if( type == 4 )
+{
+   C = (A >= B)
+}
+if( type == 5 )
+{
+   C = (A <= B)
+}
+
+
 writeMM(as(C, "CsparseMatrix"), paste(args[3], "C", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/PPredScalarLeftTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/PPredScalarLeftTest.R b/src/test/scripts/functions/binary/matrix_full_other/PPredScalarLeftTest.R
index 5578776..c54b185 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/PPredScalarLeftTest.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/PPredScalarLeftTest.R
@@ -19,42 +19,42 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-
-type = as.integer(args[2])
-constant = as.double(args[3]);
-
-if( type == 0 )
-{
-   B = (constant > A)
-}
-if( type == 1 )
-{
-   B = (constant < A)
-}
-if( type == 2 )
-{
-   B = (constant == A)
-}
-if( type == 3 )
-{
-   B = (constant != A)
-}
-if( type == 4 )
-{
-   B = (constant >= A)
-}
-if( type == 5 )
-{
-   B = (constant <= A)
-}
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+
+type = as.integer(args[2])
+constant = as.double(args[3]);
+
+if( type == 0 )
+{
+   B = (constant > A)
+}
+if( type == 1 )
+{
+   B = (constant < A)
+}
+if( type == 2 )
+{
+   B = (constant == A)
+}
+if( type == 3 )
+{
+   B = (constant != A)
+}
+if( type == 4 )
+{
+   B = (constant >= A)
+}
+if( type == 5 )
+{
+   B = (constant <= A)
+}
+
+
 writeMM(as(B, "CsparseMatrix"), paste(args[4], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/PPredScalarRightTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/PPredScalarRightTest.R b/src/test/scripts/functions/binary/matrix_full_other/PPredScalarRightTest.R
index e8944da..cd39071 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/PPredScalarRightTest.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/PPredScalarRightTest.R
@@ -19,42 +19,42 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-
-type = as.integer(args[2])
-constant = as.double(args[3]);
-
-if( type == 0 )
-{
-   B = (A > constant)
-}
-if( type == 1 )
-{
-   B = (A < constant)
-}
-if( type == 2 )
-{
-   B = (A == constant)
-}
-if( type == 3 )
-{
-   B = (A != constant)
-}
-if( type == 4 )
-{
-   B = (A >= constant)
-}
-if( type == 5 )
-{
-   B = (A <= constant)
-}
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+
+type = as.integer(args[2])
+constant = as.double(args[3]);
+
+if( type == 0 )
+{
+   B = (A > constant)
+}
+if( type == 1 )
+{
+   B = (A < constant)
+}
+if( type == 2 )
+{
+   B = (A == constant)
+}
+if( type == 3 )
+{
+   B = (A != constant)
+}
+if( type == 4 )
+{
+   B = (A >= constant)
+}
+if( type == 5 )
+{
+   B = (A <= constant)
+}
+
+
 writeMM(as(B, "CsparseMatrix"), paste(args[4], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication1.R b/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication1.R
index 8e88c02..b6171d7 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication1.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication1.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-
-B <- t(A)%*%A;
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+
+B <- t(A)%*%A;
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication2.R b/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication2.R
index 241a3d2..8b68e28 100644
--- a/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication2.R
+++ b/src/test/scripts/functions/binary/matrix_full_other/TransposeSelfMatrixMultiplication2.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-
-B <- A%*%t(A);
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+
+B <- A%*%t(A);
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/scalar/ModulusSingleTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/scalar/ModulusSingleTest.dml b/src/test/scripts/functions/binary/scalar/ModulusSingleTest.dml
index 86ffde2..9c62581 100644
--- a/src/test/scripts/functions/binary/scalar/ModulusSingleTest.dml
+++ b/src/test/scripts/functions/binary/scalar/ModulusSingleTest.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.binary.scalar.ModulusTest.java
-
-$$readhelper$$
-
-$$vardeclaration$$
-Computation = $$op1$$ %% $$op2$$;
-ComputationHelper = Computation * Helper;
+
+# junit test class: org.apache.sysml.test.integration.functions.binary.scalar.ModulusTest.java
+
+$$readhelper$$
+
+$$vardeclaration$$
+Computation = $$op1$$ %% $$op2$$;
+ComputationHelper = Computation * Helper;
 write(ComputationHelper, "$$outdir$$computed", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/scalar/ModulusTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/scalar/ModulusTest.dml b/src/test/scripts/functions/binary/scalar/ModulusTest.dml
index eb1167c..e798f4f 100644
--- a/src/test/scripts/functions/binary/scalar/ModulusTest.dml
+++ b/src/test/scripts/functions/binary/scalar/ModulusTest.dml
@@ -19,27 +19,27 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.binary.scalar.ModulusTest
-
-$$readhelper$$
-
-$$intintvardeclaration$$
-IntIntComputation = $$intintop1$$ %% $$intintop2$$;
-IntIntComputationHelper = IntIntComputation * Helper;
-write(IntIntComputationHelper, "$$outdir$$int_int", format="text");
-
-$$intdoublevardeclaration$$
-IntDoubleComputation = $$intdoubleop1$$ %% $$intdoubleop2$$;
-IntDoubleComputationHelper = IntDoubleComputation * Helper;
-write(IntDoubleComputationHelper, "$$outdir$$int_double", format="text");
-
-$$doubledoublevardeclaration$$
-DoubleDoubleComputation = $$doubledoubleop1$$ %% $$doubledoubleop2$$;
-DoubleDoubleComputationHelper = DoubleDoubleComputation * Helper;
-write(DoubleDoubleComputationHelper, "$$outdir$$double_double", format="text");
-
-$$doubleintvardeclaration$$
-DoubleIntComputation = $$doubleintop1$$ %% $$doubleintop2$$;
-DoubleIntComputationHelper = DoubleIntComputation * Helper;
+
+# junit test class: org.apache.sysml.test.integration.functions.binary.scalar.ModulusTest
+
+$$readhelper$$
+
+$$intintvardeclaration$$
+IntIntComputation = $$intintop1$$ %% $$intintop2$$;
+IntIntComputationHelper = IntIntComputation * Helper;
+write(IntIntComputationHelper, "$$outdir$$int_int", format="text");
+
+$$intdoublevardeclaration$$
+IntDoubleComputation = $$intdoubleop1$$ %% $$intdoubleop2$$;
+IntDoubleComputationHelper = IntDoubleComputation * Helper;
+write(IntDoubleComputationHelper, "$$outdir$$int_double", format="text");
+
+$$doubledoublevardeclaration$$
+DoubleDoubleComputation = $$doubledoubleop1$$ %% $$doubledoubleop2$$;
+DoubleDoubleComputationHelper = DoubleDoubleComputation * Helper;
+write(DoubleDoubleComputationHelper, "$$outdir$$double_double", format="text");
+
+$$doubleintvardeclaration$$
+DoubleIntComputation = $$doubleintop1$$ %% $$doubleintop2$$;
+DoubleIntComputationHelper = DoubleIntComputation * Helper;
 write(DoubleIntComputationHelper, "$$outdir$$double_int", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/caching/export.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/caching/export.dml b/src/test/scripts/functions/caching/export.dml
index 76cd082..790cc9e 100644
--- a/src/test/scripts/functions/caching/export.dml
+++ b/src/test/scripts/functions/caching/export.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3, format="text");
+
+V = read($1,rows=$2,cols=$3, format="text");
 write(V, $4, format=$5);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/RandRuntimePlatformTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/RandRuntimePlatformTest.dml b/src/test/scripts/functions/data/RandRuntimePlatformTest.dml
index 7bc714a..ad91e7a 100644
--- a/src/test/scripts/functions/data/RandRuntimePlatformTest.dml
+++ b/src/test/scripts/functions/data/RandRuntimePlatformTest.dml
@@ -1,26 +1,26 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# This script simply generates a matrix with random numbers according to a given set of arguments.
-# This script must be invoked with via runtime platforms (for example, -exec hadoop -exec singlenode etc.)
-
-A = Rand(rows=$1, cols=$2, sparsity=$3, seed=$4, pdf=$5);
-write(A, $6);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script simply generates a matrix with random numbers according to a given set of arguments.
+# This script must be invoked with via runtime platforms (for example, -exec hadoop -exec singlenode etc.)
+
+A = Rand(rows=$1, cols=$2, sparsity=$3, seed=$4, pdf=$5);
+write(A, $6);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/RandRuntimePlatformTestPoisson.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/RandRuntimePlatformTestPoisson.dml b/src/test/scripts/functions/data/RandRuntimePlatformTestPoisson.dml
index d6147f3..f3e7ffd 100644
--- a/src/test/scripts/functions/data/RandRuntimePlatformTestPoisson.dml
+++ b/src/test/scripts/functions/data/RandRuntimePlatformTestPoisson.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-# This script simply generates a matrix with random numbers according to a given set of arguments.
-# This script must be invoked with via runtime platforms (for example, -exec hadoop -exec singlenode etc.)
-
-A = Rand(rows=$1, cols=$2, sparsity=$3, seed=$4, pdf=$5, lambda=$6);
-write(A, $7);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+# This script simply generates a matrix with random numbers according to a given set of arguments.
+# This script must be invoked with via runtime platforms (for example, -exec hadoop -exec singlenode etc.)
+
+A = Rand(rows=$1, cols=$2, sparsity=$3, seed=$4, pdf=$5, lambda=$6);
+write(A, $7);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/RandVarMinMax.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/RandVarMinMax.R b/src/test/scripts/functions/data/RandVarMinMax.R
index 811a210..ce07801 100644
--- a/src/test/scripts/functions/data/RandVarMinMax.R
+++ b/src/test/scripts/functions/data/RandVarMinMax.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-
-M = as.integer(args[1]);
-N = as.integer(args[2]);
-
-R = matrix(0, M, N);
-
-for (x in 1 : M) {
-    R[x,] = matrix (x, 1, N);
-}
-
-writeMM(as(R,"CsparseMatrix"), paste(args[3], "R", sep=""))
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+
+M = as.integer(args[1]);
+N = as.integer(args[2]);
+
+R = matrix(0, M, N);
+
+for (x in 1 : M) {
+    R[x,] = matrix (x, 1, N);
+}
+
+writeMM(as(R,"CsparseMatrix"), paste(args[3], "R", sep=""))
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sample2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sample2.dml b/src/test/scripts/functions/data/Sample2.dml
index 044ca8e..cb672e5 100644
--- a/src/test/scripts/functions/data/Sample2.dml
+++ b/src/test/scripts/functions/data/Sample2.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = sample($1, $2);
-write(A, $3);
+
+A = sample($1, $2);
+write(A, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sample3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sample3.dml b/src/test/scripts/functions/data/Sample3.dml
index 58065c0..124a6a5 100644
--- a/src/test/scripts/functions/data/Sample3.dml
+++ b/src/test/scripts/functions/data/Sample3.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = sample($1, $2, $3);
-write(A, $4);
+
+A = sample($1, $2, $3);
+write(A, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sample4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sample4.dml b/src/test/scripts/functions/data/Sample4.dml
index 05ca57a..d3de0be 100644
--- a/src/test/scripts/functions/data/Sample4.dml
+++ b/src/test/scripts/functions/data/Sample4.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = sample($1, $2, $3, $4);
-write(A, $5);
+
+A = sample($1, $2, $3, $4);
+write(A, $5);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sequence.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sequence.R b/src/test/scripts/functions/data/Sequence.R
index 6b25c5a..0113777 100644
--- a/src/test/scripts/functions/data/Sequence.R
+++ b/src/test/scripts/functions/data/Sequence.R
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = seq(as.numeric(args[1]), as.numeric(args[2]), as.numeric(args[3]));
-writeMM(as(A,"CsparseMatrix"), paste(args[4], "A", sep=""), format="text")
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = seq(as.numeric(args[1]), as.numeric(args[2]), as.numeric(args[3]));
+writeMM(as(A,"CsparseMatrix"), paste(args[4], "A", sep=""), format="text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sequence.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sequence.dml b/src/test/scripts/functions/data/Sequence.dml
index 52b10e6..94c21b9 100644
--- a/src/test/scripts/functions/data/Sequence.dml
+++ b/src/test/scripts/functions/data/Sequence.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = seq($1, $2, $3);
-write(A, $4);
+
+
+A = seq($1, $2, $3);
+write(A, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sequence2inputs.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sequence2inputs.R b/src/test/scripts/functions/data/Sequence2inputs.R
index 40abb37..7fecf5e 100644
--- a/src/test/scripts/functions/data/Sequence2inputs.R
+++ b/src/test/scripts/functions/data/Sequence2inputs.R
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = seq(as.numeric(args[1]), as.numeric(args[2]));
-writeMM(as(A,"CsparseMatrix"), paste(args[3], "A", sep=""), format="text")
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = seq(as.numeric(args[1]), as.numeric(args[2]));
+writeMM(as(A,"CsparseMatrix"), paste(args[3], "A", sep=""), format="text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/Sequence2inputs.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/Sequence2inputs.dml b/src/test/scripts/functions/data/Sequence2inputs.dml
index c46b424..7d0c89f 100644
--- a/src/test/scripts/functions/data/Sequence2inputs.dml
+++ b/src/test/scripts/functions/data/Sequence2inputs.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = seq($1, $2);
-write(A, $3);
+
+
+A = seq($1, $2);
+write(A, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/StrInit.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/StrInit.dml b/src/test/scripts/functions/data/StrInit.dml
index 04b6603..0c36b66 100644
--- a/src/test/scripts/functions/data/StrInit.dml
+++ b/src/test/scripts/functions/data/StrInit.dml
@@ -1,23 +1,23 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = matrix($1, rows=$2, cols=$3);
-write(A, $4);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = matrix($1, rows=$2, cols=$3);
+write(A, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/WriteMMComplexTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/WriteMMComplexTest.dml b/src/test/scripts/functions/data/WriteMMComplexTest.dml
index 5e0674e..63ed24e 100644
--- a/src/test/scripts/functions/data/WriteMMComplexTest.dml
+++ b/src/test/scripts/functions/data/WriteMMComplexTest.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.data.WriteMMTest.java
-
-A = read ($1, rows=$2, cols=$3, format="text");
-
-i = 0;
-while( i < 2) {
-     write (A, $4, format="mm");
-     i = i +1;
-}
+
+# junit test class: org.apache.sysml.test.integration.functions.data.WriteMMTest.java
+
+A = read ($1, rows=$2, cols=$3, format="text");
+
+i = 0;
+while( i < 2) {
+     write (A, $4, format="mm");
+     i = i +1;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/data/WriteMMTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/data/WriteMMTest.dml b/src/test/scripts/functions/data/WriteMMTest.dml
index edc0747..2fd7bcf 100644
--- a/src/test/scripts/functions/data/WriteMMTest.dml
+++ b/src/test/scripts/functions/data/WriteMMTest.dml
@@ -19,9 +19,9 @@
 #
 #-------------------------------------------------------------
 
-
-# junit test class: org.apache.sysml.test.integration.functions.data.WriteMMTest.java
-
-A = read($1, rows=$2, cols=$3, format="text");
-
-write(A, $4, format="mm");
+
+# junit test class: org.apache.sysml.test.integration.functions.data.WriteMMTest.java
+
+A = read($1, rows=$2, cols=$3, format="text");
+
+write(A, $4, format="mm");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/DynProject.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/DynProject.R b/src/test/scripts/functions/external/DynProject.R
index 67f6009..0f476f0 100644
--- a/src/test/scripts/functions/external/DynProject.R
+++ b/src/test/scripts/functions/external/DynProject.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")));
-c <- as.matrix(readMM(paste(args[1], "c.mtx", sep="")));
-
-if( ncol(X)==1 )
-{
-   Y <- X[c];
-} else {
-   Y <- X[c,c];
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")));
+c <- as.matrix(readMM(paste(args[1], "c.mtx", sep="")));
+
+if( ncol(X)==1 )
+{
+   Y <- X[c];
+} else {
+   Y <- X[c,c];
+}
+
 writeMM(as(Y, "CsparseMatrix"), paste(args[2], "Y.mtx", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/DynProject.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/DynProject.dml b/src/test/scripts/functions/external/DynProject.dml
index 6339575..2444a17 100644
--- a/src/test/scripts/functions/external/DynProject.dml
+++ b/src/test/scripts/functions/external/DynProject.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-dynProject = externalFunction(Matrix[Double] B, Matrix[Double] c)
-             return (Matrix[Double] PD) 
-             implemented in (classname="org.apache.sysml.udf.lib.DynamicProjectMatrixCP",exectype="mem")  
-
-X = read($1, rows=$3, cols=$4, format="text");
-c = read($2, rows=1, cols=$5, format="text");
-
-Y = dynProject(X, c);
-
+
+dynProject = externalFunction(Matrix[Double] B, Matrix[Double] c)
+             return (Matrix[Double] PD) 
+             implemented in (classname="org.apache.sysml.udf.lib.DynamicProjectMatrixCP",exectype="mem")  
+
+X = read($1, rows=$3, cols=$4, format="text");
+c = read($2, rows=1, cols=$5, format="text");
+
+Y = dynProject(X, c);
+
 write(Y, $6);   
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/DynReadWrite.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/DynReadWrite.dml b/src/test/scripts/functions/external/DynReadWrite.dml
index aee88dc..eb98991 100644
--- a/src/test/scripts/functions/external/DynReadWrite.dml
+++ b/src/test/scripts/functions/external/DynReadWrite.dml
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-dynRead = externalFunction(String fname, Integer rows, Integer cols, String format)
-return (Matrix[Double] M) 
-implemented in (classname="org.apache.sysml.udf.lib.DynamicReadMatrixCP",exectype="mem")   
-
-dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
-return(Boolean success)
-implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem")  
-
-
-X = read($1, rows=$2, cols=$3, format="text");
-
-s = $1+"2";
-ret1 = dynWrite(X, s, $4);
-Y = dynRead(s, $2, $3, $4);
-ret2 = dynWrite(Y, $5, $4);
-
-print("successfully written: "+ret2);
+
+dynRead = externalFunction(String fname, Integer rows, Integer cols, String format)
+return (Matrix[Double] M) 
+implemented in (classname="org.apache.sysml.udf.lib.DynamicReadMatrixCP",exectype="mem")   
+
+dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
+return(Boolean success)
+implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem")  
+
+
+X = read($1, rows=$2, cols=$3, format="text");
+
+s = $1+"2";
+ret1 = dynWrite(X, s, $4);
+Y = dynRead(s, $2, $3, $4);
+ret2 = dynWrite(Y, $5, $4);
+
+print("successfully written: "+ret2);
    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/FunctionExpressions1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/FunctionExpressions1.dml b/src/test/scripts/functions/external/FunctionExpressions1.dml
index 816b043..94c5633 100644
--- a/src/test/scripts/functions/external/FunctionExpressions1.dml
+++ b/src/test/scripts/functions/external/FunctionExpressions1.dml
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
-			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
-
-foo = function( Matrix[Double] A ) return (Matrix[Double] B)  
-{
-   for( i in 1:ncol(A) ) {
-      B = orderExternal(A, i, TRUE);
-   }
-}
- 
-X = read( $1, rows=$2, cols=$3 );
-Y = foo( X*X+7 );
-Y = sqrt( Y-7 );
-write( Y, $4 ); #ordered input
-
+
+orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
+			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
+
+foo = function( Matrix[Double] A ) return (Matrix[Double] B)  
+{
+   for( i in 1:ncol(A) ) {
+      B = orderExternal(A, i, TRUE);
+   }
+}
+ 
+X = read( $1, rows=$2, cols=$3 );
+Y = foo( X*X+7 );
+Y = sqrt( Y-7 );
+write( Y, $4 ); #ordered input
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/FunctionExpressions2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/FunctionExpressions2.dml b/src/test/scripts/functions/external/FunctionExpressions2.dml
index 39c50cf..fb2fcba 100644
--- a/src/test/scripts/functions/external/FunctionExpressions2.dml
+++ b/src/test/scripts/functions/external/FunctionExpressions2.dml
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
-			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
-
-foo = function( Matrix[Double] A ) return (Matrix[Double] B)  
-{
-   for( i in 1:ncol(A) ) {
-      B = orderExternal(A*A+7, i, TRUE);
-   }
-}
- 
-X = read( $1, rows=$2, cols=$3 );
-Y = foo( X );
-Y = sqrt( Y-7 );
-write( Y, $4 ); #ordered input
-
+
+orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
+			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
+
+foo = function( Matrix[Double] A ) return (Matrix[Double] B)  
+{
+   for( i in 1:ncol(A) ) {
+      B = orderExternal(A*A+7, i, TRUE);
+   }
+}
+ 
+X = read( $1, rows=$2, cols=$3 );
+Y = foo( X );
+Y = sqrt( Y-7 );
+write( Y, $4 ); #ordered input
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/Order.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/Order.R b/src/test/scripts/functions/external/Order.R
index b061c25..769b6a8 100644
--- a/src/test/scripts/functions/external/Order.R
+++ b/src/test/scripts/functions/external/Order.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- readMM(paste(args[1], "A.mtx", sep=""))
-col <- as.numeric(args[2]);
-
-B <- A[order(sign(col)*A[,abs(col)]),]
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- readMM(paste(args[1], "A.mtx", sep=""))
+col <- as.numeric(args[2]);
+
+B <- A[order(sign(col)*A[,abs(col)]),]
+
 writeMM(as(B, "CsparseMatrix"), paste(args[3], "B.mtx", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/Order1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/Order1.dml b/src/test/scripts/functions/external/Order1.dml
index b49a910..c7e3346 100644
--- a/src/test/scripts/functions/external/Order1.dml
+++ b/src/test/scripts/functions/external/Order1.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
-			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
- 
-A = read( $1, rows=$2, cols=$3);  
-B1 = orderExternal(A, $4, FALSE);
-
-write(B1, $5);
-
+
+orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
+			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
+ 
+A = read( $1, rows=$2, cols=$3);  
+B1 = orderExternal(A, $4, FALSE);
+
+write(B1, $5);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/Order2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/Order2.dml b/src/test/scripts/functions/external/Order2.dml
index 8359560..69e90b5 100644
--- a/src/test/scripts/functions/external/Order2.dml
+++ b/src/test/scripts/functions/external/Order2.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
-			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
- 
-A = read( $1, rows=$2, cols=$3);  
-B1 = orderExternal(A, $4, TRUE);
-
-write(B1, $5);
-
+
+orderExternal = externalFunction(Matrix[Double] A, Integer col, Boolean desc) return (Matrix[Double] B) 
+			    implemented in (classname="org.apache.sysml.udf.lib.OrderWrapper",exectype="mem")
+ 
+A = read( $1, rows=$2, cols=$3);  
+B1 = orderExternal(A, $4, TRUE);
+
+write(B1, $5);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/Outlier.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/Outlier.dml b/src/test/scripts/functions/external/Outlier.dml
index 071125a..1e0668d 100644
--- a/src/test/scripts/functions/external/Outlier.dml
+++ b/src/test/scripts/functions/external/Outlier.dml
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Outlier.dml?
-# Assume OUTLIER_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 100, cols = 10 for input matrix M
-# hadoop jar SystemML.jar -f $OUTLIER_HOME/Outlier.dml -args "$INPUT_DIR/M" 100 10 "$OUPUT_DIR/o"
-
-outliers = externalFunction(Matrix[Double] D, Integer m, Integer k) 
-return (Matrix[Double] C) 
-implemented in (classname="org.apache.sysml.udf.lib.OutlierWrapper",execlocation="master")
-
-M = read( $1, rows=$2, cols=$3, format="text");
-
-k = 2;
-m = 5;
-
-o = outliers(M,m,k);
-
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Outlier.dml?
+# Assume OUTLIER_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 100, cols = 10 for input matrix M
+# hadoop jar SystemML.jar -f $OUTLIER_HOME/Outlier.dml -args "$INPUT_DIR/M" 100 10 "$OUPUT_DIR/o"
+
+outliers = externalFunction(Matrix[Double] D, Integer m, Integer k) 
+return (Matrix[Double] C) 
+implemented in (classname="org.apache.sysml.udf.lib.OutlierWrapper",execlocation="master")
+
+M = read( $1, rows=$2, cols=$3, format="text");
+
+k = 2;
+m = 5;
+
+o = outliers(M,m,k);
+
 write(o, $4, format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/kMeans.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/kMeans.dml b/src/test/scripts/functions/external/kMeans.dml
index d5343b3..28b7a77 100644
--- a/src/test/scripts/functions/external/kMeans.dml
+++ b/src/test/scripts/functions/external/kMeans.dml
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script kMeans.dml?
-# Assume KMEANS_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 100, cols = 10, clusters = 5 
-# hadoop jar SystemML.jar -f $KMEANS_HOME/kMeans.dml -args "$INPUT_DIR/M" 100 10 "$OUPUT_DIR/kcenters"
-
-kMeans = externalFunction(Matrix[Double] D, Integer k) 
-return (Matrix[Double] C) 
-implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
-
-M = read($1, rows=$2, cols=$3, format="text");
-
-k = 5;
-
-kcenters = kMeans(M,k)
-
-write(kcenters, $4, format="text");
-
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script kMeans.dml?
+# Assume KMEANS_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 100, cols = 10, clusters = 5 
+# hadoop jar SystemML.jar -f $KMEANS_HOME/kMeans.dml -args "$INPUT_DIR/M" 100 10 "$OUPUT_DIR/kcenters"
+
+kMeans = externalFunction(Matrix[Double] D, Integer k) 
+return (Matrix[Double] C) 
+implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
+
+M = read($1, rows=$2, cols=$3, format="text");
+
+k = 5;
+
+kcenters = kMeans(M,k)
+
+write(kcenters, $4, format="text");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/external/kMeans2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/external/kMeans2.dml b/src/test/scripts/functions/external/kMeans2.dml
index b1f4a76..ae1edd2 100644
--- a/src/test/scripts/functions/external/kMeans2.dml
+++ b/src/test/scripts/functions/external/kMeans2.dml
@@ -19,30 +19,30 @@
 #
 #-------------------------------------------------------------
 
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script kMeans.dml?
-# Assume KMEANS2_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 100, cols = 10, clusters = 5
-# hadoop jar SystemML.jar -f $KMEANS2_HOME/kMeans2.dml -args "$INPUT_DIR/M" 100 10 "$INPUT_DIR/initCenters" "$OUPUT_DIR/kcenters" "$OUTPUT_DIR/kcentersWithInit"
-
-kMeans = externalFunction(Matrix[Double] D, Integer k) 
-return (Matrix[Double] C) 
-implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
-
-kMeansWithInit = externalFunction(Matrix[Double] D, Integer k, Matrix[Double] initK)
-return (Matrix[Double] C)
-implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
-
-M = read($1, rows=$2, cols=$3, format="text");
-initialCenters = read($4, rows=5, cols=$3, format="text");
-
-k = 5;
-
-kcenters = kMeans(M,k)
-kcentersWithInit = kMeansWithInit(M,k,initialCenters);
-
-write(kcenters, $5, format="text");
-write(kcentersWithInit, $6, format="text");
-
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script kMeans.dml?
+# Assume KMEANS2_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 100, cols = 10, clusters = 5
+# hadoop jar SystemML.jar -f $KMEANS2_HOME/kMeans2.dml -args "$INPUT_DIR/M" 100 10 "$INPUT_DIR/initCenters" "$OUPUT_DIR/kcenters" "$OUTPUT_DIR/kcentersWithInit"
+
+kMeans = externalFunction(Matrix[Double] D, Integer k) 
+return (Matrix[Double] C) 
+implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
+
+kMeansWithInit = externalFunction(Matrix[Double] D, Integer k, Matrix[Double] initK)
+return (Matrix[Double] C)
+implemented in (classname="org.apache.sysml.udf.lib.kMeansWrapper",execlocation="master")
+
+M = read($1, rows=$2, cols=$3, format="text");
+initialCenters = read($4, rows=5, cols=$3, format="text");
+
+k = 5;
+
+kcenters = kMeans(M,k)
+kcentersWithInit = kMeansWithInit(M,k,initialCenters);
+
+write(kcenters, $5, format="text");
+write(kcentersWithInit, $6, format="text");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregCG.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregCG.R b/src/test/scripts/functions/gdfo/LinregCG.R
index 1d69385..5dcad95 100644
--- a/src/test/scripts/functions/gdfo/LinregCG.R
+++ b/src/test/scripts/functions/gdfo/LinregCG.R
@@ -19,39 +19,39 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-y = readMM(paste(args[1], "y.mtx", sep=""))
-
-intercept = as.integer(args[2]);
-eps = as.double(args[3]);
-maxiter = as.double(args[4]);
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = cbind(X, ones);
-}
-
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, ncol(X), 1);
-
-i = 0;
-while(i < maxiter) {
-	q = ((t(X) %*% (X %*% p)) + eps  * p);
-	alpha = norm_r2 / ((t(p) %*% q)[1:1]);
-	w = w + alpha * p;
-	old_norm_r2 = norm_r2;
-	r = r + alpha * q;
-	norm_r2 = sum(r * r);
-	beta = norm_r2 / old_norm_r2;
-	p = -r + beta * p;
-	i = i + 1;
-}
-
-writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+y = readMM(paste(args[1], "y.mtx", sep=""))
+
+intercept = as.integer(args[2]);
+eps = as.double(args[3]);
+maxiter = as.double(args[4]);
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = cbind(X, ones);
+}
+
+r = -(t(X) %*% y);
+p = -r;
+norm_r2 = sum(r * r);
+w = matrix(0, ncol(X), 1);
+
+i = 0;
+while(i < maxiter) {
+	q = ((t(X) %*% (X %*% p)) + eps  * p);
+	alpha = norm_r2 / ((t(p) %*% q)[1:1]);
+	w = w + alpha * p;
+	old_norm_r2 = norm_r2;
+	r = r + alpha * q;
+	norm_r2 = sum(r * r);
+	beta = norm_r2 / old_norm_r2;
+	p = -r + beta * p;
+	i = i + 1;
+}
+
+writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregCG.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregCG.dml b/src/test/scripts/functions/gdfo/LinregCG.dml
index 02d0fad..85a66e4 100644
--- a/src/test/scripts/functions/gdfo/LinregCG.dml
+++ b/src/test/scripts/functions/gdfo/LinregCG.dml
@@ -19,38 +19,38 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-y = read($2);
-intercept = $3;
-eps = $4;
-maxiter = $5;
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = append(X, ones);
-}
-
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, rows = ncol(X), cols = 1);
-
-i = 0;
-while(i < maxiter) {
-	q = ((t(X) %*% (X %*% p)) + eps  * p);
-	alpha = norm_r2 / castAsScalar(t(p) %*% q);
-	w = w + alpha * p;
-	old_norm_r2 = norm_r2;
-	r = r + alpha * q;
-	norm_r2 = sum(r * r);
-	beta = norm_r2 / old_norm_r2;
-	p = -r + beta * p;
-	i = i + 1;
-}
-
-write(w, $6);
-
-
-
-
+
+X = read($1);
+y = read($2);
+intercept = $3;
+eps = $4;
+maxiter = $5;
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = append(X, ones);
+}
+
+r = -(t(X) %*% y);
+p = -r;
+norm_r2 = sum(r * r);
+w = matrix(0, rows = ncol(X), cols = 1);
+
+i = 0;
+while(i < maxiter) {
+	q = ((t(X) %*% (X %*% p)) + eps  * p);
+	alpha = norm_r2 / castAsScalar(t(p) %*% q);
+	w = w + alpha * p;
+	old_norm_r2 = norm_r2;
+	r = r + alpha * q;
+	norm_r2 = sum(r * r);
+	beta = norm_r2 / old_norm_r2;
+	p = -r + beta * p;
+	i = i + 1;
+}
+
+write(w, $6);
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregDS.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregDS.R b/src/test/scripts/functions/gdfo/LinregDS.R
index 588d2be..752e862 100644
--- a/src/test/scripts/functions/gdfo/LinregDS.R
+++ b/src/test/scripts/functions/gdfo/LinregDS.R
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-y = as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
-I = as.vector(matrix(1, ncol(X), 1));
-intercept = as.integer(args[2])
-lambda = as.double(args[3]);
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = cbind(X, ones);
-   I = as.vector(matrix(1, ncol(X), 1));
-}
-
-A = t(X) %*% X + diag(I)*lambda;
-b = t(X) %*% y;
-beta = solve(A, b);
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+y = as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
+I = as.vector(matrix(1, ncol(X), 1));
+intercept = as.integer(args[2])
+lambda = as.double(args[3]);
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = cbind(X, ones);
+   I = as.vector(matrix(1, ncol(X), 1));
+}
+
+A = t(X) %*% X + diag(I)*lambda;
+b = t(X) %*% y;
+beta = solve(A, b);
+
 writeMM(as(beta,"CsparseMatrix"), paste(args[4], "B", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregDS.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregDS.dml b/src/test/scripts/functions/gdfo/LinregDS.dml
index b8549ee..3601830 100644
--- a/src/test/scripts/functions/gdfo/LinregDS.dml
+++ b/src/test/scripts/functions/gdfo/LinregDS.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-y = read($2);
-I = matrix(1, ncol(X), 1);
-intercept = $3;
-lambda = $4;
-
-if( intercept == 1 ){
-   ones = matrix(1, nrow(X), 1); 
-   X = append(X, ones);
-   I = matrix(1, ncol(X), 1);
-}
-
-A = t(X) %*% X + diag(I)*lambda;
-b = t(X) %*% y;
-beta = solve(A, b);
-
-write(beta, $5);
+
+X = read($1);
+y = read($2);
+I = matrix(1, ncol(X), 1);
+intercept = $3;
+lambda = $4;
+
+if( intercept == 1 ){
+   ones = matrix(1, nrow(X), 1); 
+   X = append(X, ones);
+   I = matrix(1, ncol(X), 1);
+}
+
+A = t(X) %*% X + diag(I)*lambda;
+b = t(X) %*% y;
+beta = solve(A, b);
+
+write(beta, $5);



[33/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
index bbc0234..1adee6c 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
@@ -1,977 +1,977 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.nio.charset.CharacterCodingException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.HashMap;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.wink.json4j.JSONArray;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import scala.Tuple2;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.functionobjects.CM;
-import org.apache.sysml.runtime.functionobjects.KahanPlus;
-import org.apache.sysml.runtime.functionobjects.Mean;
-import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
-import org.apache.sysml.runtime.instructions.cp.KahanObject;
-import org.apache.sysml.runtime.matrix.operators.CMOperator;
-import org.apache.sysml.runtime.matrix.operators.CMOperator.AggregateOperationTypes;
-import org.apache.sysml.runtime.util.UtilFunctions;
-
-public class MVImputeAgent extends TransformationAgent {
-	
-	private static final long serialVersionUID = 9057868620144662194L;
-
-	public static final String MEAN_PREFIX = "mean";
-	public static final String VARIANCE_PREFIX = "var";
-	public static final String CORRECTION_PREFIX = "correction";
-	public static final String COUNT_PREFIX = "validcount";		// #of valid or non-missing values in a column
-	public static final String TOTAL_COUNT_PREFIX = "totalcount";	// #of total records processed by a mapper
-	public static final String CONSTANT_PREFIX = "constant";
-	
-	public enum MVMethod { INVALID, GLOBAL_MEAN, GLOBAL_MODE, CONSTANT };
-	
-	private int[] _mvList = null;
-	/* 
-	 * Imputation Methods:
-	 * 1 - global_mean
-	 * 2 - global_mode
-	 * 3 - constant
-	 * 
-	 */
-	private byte[] _mvMethodList = null;
-	private byte[] _mvscMethodList = null;	// scaling methods for attributes that are imputed and also scaled
-	
-	private BitSet _isMVScaled = null;
-	private CM _varFn = CM.getCMFnObject(AggregateOperationTypes.VARIANCE);		// function object that understands variance computation
-	
-	// objects required to compute mean and variance of all non-missing entries 
-	private Mean _meanFn = Mean.getMeanFnObject();	// function object that understands mean computation
-	private KahanObject[] _meanList = null; 		// column-level means, computed so far
-	private long[] _countList = null;				// #of non-missing values
-	
-	private CM_COV_Object[] _varList = null;		// column-level variances, computed so far (for scaling)
-	
-
-	private int[] 			_scnomvList = null;			// List of attributes that are scaled but not imputed
-	private byte[]			_scnomvMethodList = null;	// scaling methods: 0 for invalid; 1 for mean-subtraction; 2 for z-scoring
-	private KahanObject[] 	_scnomvMeanList = null;		// column-level means, for attributes scaled but not imputed
-	private long[] 			_scnomvCountList = null;	// #of non-missing values, for attributes scaled but not imputed
-	private CM_COV_Object[] _scnomvVarList = null;		// column-level variances, computed so far
-	
-	private String[] _replacementList = null;		// replacements: for global_mean, mean; and for global_mode, recode id of mode category
-	
-	public String[] getReplacements() { return _replacementList; }
-	public KahanObject[] getMeans()   { return _meanList; }
-	public CM_COV_Object[] getVars()  { return _varList; }
-	public KahanObject[] getMeans_scnomv()   { return _scnomvMeanList; }
-	public CM_COV_Object[] getVars_scnomv()  { return _scnomvVarList; }
-	
-	MVImputeAgent(JSONObject parsedSpec) throws JSONException {
-	
-		boolean isMV = parsedSpec.containsKey(TX_METHOD.IMPUTE.toString());
-		boolean isSC = parsedSpec.containsKey(TX_METHOD.SCALE.toString());
-		
-		if(!isMV) {
-			// MV Impute is not applicable
-			_mvList = null;
-			_mvMethodList = null;
-			_meanList = null;
-			_countList = null;
-			_replacementList = null;
-		}
-		else {
-			JSONObject mvobj = (JSONObject) parsedSpec.get(TX_METHOD.IMPUTE.toString());
-			JSONArray mvattrs = (JSONArray) mvobj.get(JSON_ATTRS);
-			JSONArray mvmthds = (JSONArray) mvobj.get(JSON_MTHD);
-			int mvLength = mvattrs.size();
-			
-			assert(mvLength == mvmthds.size());
-			
-			_mvList = new int[mvLength];
-			_mvMethodList = new byte[mvLength];
-			
-			_meanList = new KahanObject[mvLength];
-			_countList = new long[mvLength];
-			_varList = new CM_COV_Object[mvLength];
-			
-			_isMVScaled = new BitSet(_mvList.length);
-			_isMVScaled.clear();
-			
-			for(int i=0; i < _mvList.length; i++) {
-				_mvList[i] = UtilFunctions.toInt(mvattrs.get(i));
-				_mvMethodList[i] = (byte) UtilFunctions.toInt(mvmthds.get(i)); 
-				_meanList[i] = new KahanObject(0, 0);
-			}
-			
-			_replacementList = new String[mvLength]; 	// contains replacements for all columns (scale and categorical)
-			
-			JSONArray constants = (JSONArray)mvobj.get(JSON_CONSTS);
-			for(int i=0; i < constants.size(); i++) {
-				if ( constants.get(i) == null )
-					_replacementList[i] = "NaN";
-				else
-					_replacementList[i] = constants.get(i).toString();
-			}
-		}
-		
-		// Handle scaled attributes
-		if ( !isSC )
-		{
-			// scaling is not applicable
-			_scnomvCountList = null;
-			_scnomvMeanList = null;
-			_scnomvVarList = null;
-		}
-		else
-		{
-			if ( _mvList != null ) 
-				_mvscMethodList = new byte[_mvList.length];
-			
-			JSONObject scobj = (JSONObject) parsedSpec.get(TX_METHOD.SCALE.toString());
-			JSONArray scattrs = (JSONArray) scobj.get(JSON_ATTRS);
-			JSONArray scmthds = (JSONArray) scobj.get(JSON_MTHD);
-			int scLength = scattrs.size();
-			
-			int[] _allscaled = new int[scLength];
-			int scnomv = 0, colID;
-			byte mthd;
-			for(int i=0; i < scLength; i++)
-			{
-				colID = UtilFunctions.toInt(scattrs.get(i));
-				mthd = (byte) UtilFunctions.toInt(scmthds.get(i)); 
-						
-				_allscaled[i] = colID;
-				
-				// check if the attribute is also MV imputed
-				int mvidx = isImputed(colID);
-				if(mvidx != -1)
-				{
-					_isMVScaled.set(mvidx);
-					_mvscMethodList[mvidx] = mthd;
-					_varList[mvidx] = new CM_COV_Object();
-				}
-				else
-					scnomv++;	// count of scaled but not imputed 
-			}
-			
-			if(scnomv > 0)
-			{
-				_scnomvList = new int[scnomv];			
-				_scnomvMethodList = new byte[scnomv];	
-	
-				_scnomvMeanList = new KahanObject[scnomv];
-				_scnomvCountList = new long[scnomv];
-				_scnomvVarList = new CM_COV_Object[scnomv];
-				
-				for(int i=0, idx=0; i < scLength; i++)
-				{
-					colID = UtilFunctions.toInt(scattrs.get(i));
-					mthd = (byte)UtilFunctions.toInt(scmthds.get(i)); 
-							
-					if(isImputed(colID) == -1)
-					{	// scaled but not imputed
-						_scnomvList[idx] = colID;
-						_scnomvMethodList[idx] = mthd;
-						_scnomvMeanList[idx] = new KahanObject(0, 0);
-						_scnomvVarList[idx] = new CM_COV_Object();
-						idx++;
-					}
-				}
-			}
-		}
-	}
-	
-	public void prepare(String[] words, TfUtils agents) throws IOException {
-		
-		try {
-			String w = null;
-			if(_mvList != null)
-			for(int i=0; i <_mvList.length; i++) {
-				int colID = _mvList[i];
-				w = UtilFunctions.unquote(words[colID-1].trim());
-				
-				try {
-				if(!agents.isNA(w)) {
-					_countList[i]++;
-					
-					boolean computeMean = (_mvMethodList[i] == 1 || _isMVScaled.get(i) );
-					if(computeMean) {
-						// global_mean
-						double d = UtilFunctions.parseToDouble(w);
-						_meanFn.execute2(_meanList[i], d, _countList[i]);
-						
-						if (_isMVScaled.get(i) && _mvscMethodList[i] == 2)
-							_varFn.execute(_varList[i], d);
-					}
-					else {
-						// global_mode or constant
-						// Nothing to do here. Mode is computed using recode maps.
-					}
-				}
-				} catch (NumberFormatException e) 
-				{
-					throw new RuntimeException("Encountered \"" + w + "\" in column ID \"" + colID + "\", when expecting a numeric value. Consider adding \"" + w + "\" to na.strings, along with an appropriate imputation method.");
-				}
-			}
-			
-			// Compute mean and variance for attributes that are scaled but not imputed
-			if(_scnomvList != null)
-			for(int i=0; i < _scnomvList.length; i++) 
-			{
-				int colID = _scnomvList[i];
-				w = UtilFunctions.unquote(words[colID-1].trim());
-				double d = UtilFunctions.parseToDouble(w);
-				_scnomvCountList[i]++; 		// not required, this is always equal to total #records processed
-				_meanFn.execute2(_scnomvMeanList[i], d, _scnomvCountList[i]);
-				if(_scnomvMethodList[i] == 2)
-					_varFn.execute(_scnomvVarList[i], d);
-			}
-		} catch(Exception e) {
-			throw new IOException(e);
-		}
-	}
-	
-	// ----------------------------------------------------------------------------------------------------------
-	
-	private String encodeCMObj(CM_COV_Object obj)
-	{
-		StringBuilder sb = new StringBuilder();
-		sb.append(obj.w);
-		sb.append(",");
-		sb.append(obj.mean._sum);
-		sb.append(",");
-		sb.append(obj.mean._correction);
-		sb.append(",");
-		sb.append(obj.m2._sum);
-		sb.append(",");
-		sb.append(obj.m2._correction);
-		return sb.toString();
-	}
-	
-	private CM_COV_Object decodeCMObj(String s) 
-	{
-		CM_COV_Object obj = new CM_COV_Object();
-		String[] parts = s.split(",");
-		obj.w = UtilFunctions.parseToDouble(parts[0]);
-		obj.mean._sum = UtilFunctions.parseToDouble(parts[1]);
-		obj.mean._correction = UtilFunctions.parseToDouble(parts[2]);
-		obj.m2._sum = UtilFunctions.parseToDouble(parts[3]);
-		obj.m2._correction = UtilFunctions.parseToDouble(parts[4]);
-		
-		return obj;
-	}
-	
-	private DistinctValue prepMeanOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
-		
-		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
-		
-		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
-			String suffix = null;
-			if(scnomv)
-				suffix = "scnomv";
-			else if ( mthd ==1 && _isMVScaled.get(idx) )
-				suffix = "scmv"; 	// both scaled and mv imputed
-			else if ( mthd == 1 )
-				suffix = "noscmv";
-			else
-				suffix = "scnomv";
-			
-			sb.setLength(0);
-			sb.append(MEAN_PREFIX);
-			sb.append("_");
-			sb.append(taskID);
-			sb.append("_");
-			double mean = (scnomv ? _scnomvMeanList[idx]._sum : _meanList[idx]._sum);
-			sb.append(Double.toString(mean));
-			sb.append(",");
-			sb.append(suffix);
-			//String s = MEAN_PREFIX + "_" + taskID + "_" + Double.toString(_meanList[idx]._sum) + "," + suffix;
-			return new DistinctValue(sb.toString(), -1L);
-		}
-		
-		return null;
-	}
-	
-	private DistinctValue prepMeanCorrectionOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
-		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
-		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
-			sb.setLength(0);
-			//CORRECTION_PREFIX + "_" + taskID + "_" + Double.toString(mean._correction);
-			sb.append(CORRECTION_PREFIX);
-			sb.append("_");
-			sb.append(taskID);
-			sb.append("_");
-			double corr = (scnomv ? _scnomvMeanList[idx]._correction : _meanList[idx]._correction);
-			sb.append(Double.toString(corr));
-			return new DistinctValue(sb.toString(), -1L);
-		}
-		return null;
-	}
-	
-	private DistinctValue prepMeanCountOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
-		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
-		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
-			sb.setLength(0);
-			//s = COUNT_PREFIX + "_" + taskID + "_" + Long.toString(count);
-			sb.append(COUNT_PREFIX);
-			sb.append("_");
-			sb.append(taskID);
-			sb.append("_");
-			long count = (scnomv ? _scnomvCountList[idx] : _countList[idx]);
-			sb.append( Long.toString(count));
-			return new DistinctValue(sb.toString(), -1L);
-		}
-		return null;
-	}
-	
-	private DistinctValue prepTotalCountOutput(int taskID, int idx, StringBuilder sb, boolean scnomv, TfUtils agents) throws CharacterCodingException {
-		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
-		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
-			sb.setLength(0);
-			//TOTAL_COUNT_PREFIX + "_" + taskID + "_" + Long.toString(TransformationAgent._numValidRecords);
-			sb.append(TOTAL_COUNT_PREFIX);
-			sb.append("_");
-			sb.append(taskID);
-			sb.append("_");
-			sb.append( Long.toString(agents.getValid()) );
-			return new DistinctValue(sb.toString(), -1L);
-		}
-		return null;
-	}
-	
-	private DistinctValue prepConstantOutput(int idx, StringBuilder sb) throws CharacterCodingException {
-		if ( _mvMethodList == null )
-			return null;
-		byte mthd = _mvMethodList[idx];
-		if ( mthd == 3 ) {
-			sb.setLength(0);
-			sb.append(CONSTANT_PREFIX);
-			sb.append("_");
-			sb.append(_replacementList[idx]);
-			return new DistinctValue(sb.toString(), -1);
-		}
-		return null;
-	}
-	
-	private DistinctValue prepVarOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
-		if ( scnomv || _isMVScaled.get(idx) && _mvscMethodList[idx] == 2 ) {
-			sb.setLength(0);
-			sb.append(VARIANCE_PREFIX);
-			sb.append("_");
-			sb.append(taskID);
-			sb.append("_");
-			CM_COV_Object cm = (scnomv ? _scnomvVarList[idx] : _varList[idx]);
-			sb.append(encodeCMObj(cm));
-		
-			return new DistinctValue(sb.toString(), -1L);
-		}
-		return null;
-	}
-	
-	private void outDV(IntWritable iw, DistinctValue dv, OutputCollector<IntWritable, DistinctValue> out) throws IOException {
-		if ( dv != null )	
-			out.collect(iw, dv);
-	}
-	
-	/**
-	 * Method to output transformation metadata from the mappers. 
-	 * This information is collected and merged by the reducers.
-	 * 
-	 * @param out
-	 * @throws IOException
-	 */
-	@Override
-	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
-		try { 
-			StringBuilder sb = new StringBuilder();
-			DistinctValue dv = null;
-			
-			if(_mvList != null)
-				for(int i=0; i < _mvList.length; i++) {
-					int colID = _mvList[i];
-					IntWritable iw = new IntWritable(-colID);
-					
-					dv = prepMeanOutput(taskID, i, sb, false);				outDV(iw, dv, out);
-					dv = prepMeanCorrectionOutput(taskID, i, sb, false);	outDV(iw, dv, out);
-					dv = prepMeanCountOutput(taskID, i, sb, false);			outDV(iw, dv, out);
-					dv = prepTotalCountOutput(taskID, i, sb, false, agents); outDV(iw, dv, out);
-					
-					dv = prepConstantOutput(i, sb);							outDV(iw, dv, out);
-					
-					// output variance information relevant to scaling
-					dv = prepVarOutput(taskID, i, sb, false);				outDV(iw, dv, out);
-				}
-			
-			// handle attributes that are scaled but not imputed
-			if(_scnomvList != null)
-				for(int i=0; i < _scnomvList.length; i++)
-				{
-					int colID = _scnomvList[i];
-					IntWritable iw = new IntWritable(-colID);
-					
-					dv = prepMeanOutput(taskID, i, sb, true);				outDV(iw, dv, out);
-					dv = prepMeanCorrectionOutput(taskID, i, sb, true);		outDV(iw, dv, out);
-					dv = prepMeanCountOutput(taskID, i, sb, true);			outDV(iw, dv, out);
-					dv = prepTotalCountOutput(taskID, i, sb, true, agents);	outDV(iw, dv, out);
-					
-					dv = prepVarOutput(taskID, i, sb, true);				outDV(iw, dv, out); 
-				}
-		} catch(Exception e) {
-			throw new IOException(e);
-		}
-	}
-	
-	/**
-	 * Applicable when running on SPARK.
-	 * Helper function to output transformation metadata into shuffle.
-	 * 
-	 * @param iw
-	 * @param dv
-	 * @param list
-	 * @throws IOException
-	 */
-	
-	private void addDV(Integer iw, DistinctValue dv, ArrayList<Tuple2<Integer, DistinctValue>> list) throws IOException {
-		if ( dv != null )	
-			list.add( new Tuple2<Integer, DistinctValue>(iw, dv) );	
-	}
-
-	public ArrayList<Tuple2<Integer, DistinctValue>> mapOutputTransformationMetadata(int taskID, ArrayList<Tuple2<Integer, DistinctValue>> list, TfUtils agents) throws IOException {
-		try { 
-			StringBuilder sb = new StringBuilder();
-			DistinctValue dv = null;
-			
-			if(_mvList != null)
-				for(int i=0; i < _mvList.length; i++) {
-					int colID = _mvList[i];
-					Integer iw = -colID;
-					
-					dv = prepMeanOutput(taskID, i, sb, false);				addDV(iw, dv, list);
-					dv = prepMeanCorrectionOutput(taskID, i, sb, false);	addDV(iw, dv, list);
-					dv = prepMeanCountOutput(taskID, i, sb, false);			addDV(iw, dv, list);
-					dv = prepTotalCountOutput(taskID, i, sb, false, agents); addDV(iw, dv, list);
-					
-					dv = prepConstantOutput(i, sb);							addDV(iw, dv, list);
-					
-					// output variance information relevant to scaling
-					dv = prepVarOutput(taskID, i, sb, false);				addDV(iw, dv, list);
-				}
-			
-			// handle attributes that are scaled but not imputed
-			if(_scnomvList != null)
-				for(int i=0; i < _scnomvList.length; i++)
-				{
-					int colID = _scnomvList[i];
-					Integer iw = -colID;
-					
-					dv = prepMeanOutput(taskID, i, sb, true);				addDV(iw, dv, list);
-					dv = prepMeanCorrectionOutput(taskID, i, sb, true);		addDV(iw, dv, list);
-					dv = prepMeanCountOutput(taskID, i, sb, true);			addDV(iw, dv, list);
-					dv = prepTotalCountOutput(taskID, i, sb, true, agents);	addDV(iw, dv, list);
-					
-					dv = prepVarOutput(taskID, i, sb, true);				addDV(iw, dv, list); 
-				}
-		} catch(Exception e) {
-			throw new IOException(e);
-		}
-		return list;
-	}
-	
-	// ----------------------------------------------------------------------------------------------------------
-	
-	private void writeTfMtd(int colID, String mean, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
-	{
-		Path pt=new Path(tfMtdDir+"/Impute/"+ agents.getName(colID) + MV_FILE_SUFFIX);
-		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		br.write(colID + TXMTD_SEP + mean + "\n");
-		br.close();
-	}
-	
-	private void writeTfMtd(int colID, String mean, String sdev, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
-	{
-		Path pt=new Path(tfMtdDir+"/Scale/"+ agents.getName(colID) + SCALE_FILE_SUFFIX);
-		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		br.write(colID + TXMTD_SEP + mean + TXMTD_SEP + sdev + "\n");
-		br.close();
-	}
-	
-	private void writeTfMtd(int colID, String min, String max, String binwidth, String nbins, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
-	{
-		Path pt = new Path(tfMtdDir+"/Bin/"+ agents.getName(colID) + BIN_FILE_SUFFIX);
-		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
-		br.write(colID + TXMTD_SEP + min + TXMTD_SEP + max + TXMTD_SEP + binwidth + TXMTD_SEP + nbins + "\n");
-		br.close();
-	}
-	
-	public void outputTransformationMetadata(String outputDir, FileSystem fs, TfUtils agents) throws IOException {
-		
-		try{
-			if (_mvList != null)
-				for(int i=0; i < _mvList.length; i++) {
-					int colID = _mvList[i];
-					
-					double imputedValue = Double.NaN;
-					KahanObject gmean = null;
-					if ( _mvMethodList[i] == 1 ) 
-					{
-						gmean = _meanList[i];
-						imputedValue = _meanList[i]._sum;
-						
-						double mean = ( _countList[i] == 0 ? 0.0 : _meanList[i]._sum); 
-						writeTfMtd(colID, Double.toString(mean), outputDir, fs, agents);
-					}
-					else if ( _mvMethodList[i] == 3 ) 
-					{
-						writeTfMtd(colID, _replacementList[i], outputDir, fs, agents);
-						
-						if (_isMVScaled.get(i) )
-						{
-							imputedValue = UtilFunctions.parseToDouble(_replacementList[i]);
-							// adjust the global mean, by combining gmean with "replacement" (weight = #missing values)
-							gmean = new KahanObject(_meanList[i]._sum, _meanList[i]._correction);
-							_meanFn.execute(gmean, imputedValue, agents.getValid());
-						}
-					}
-						
-					if ( _isMVScaled.get(i) ) 
-					{
-						double sdev = -1.0;
-						if ( _mvscMethodList[i] == 2 ) {
-							// Adjust variance with missing values
-							long totalMissingCount = (agents.getValid() - _countList[i]);
-							_varFn.execute(_varList[i], imputedValue, totalMissingCount);
-							double var = _varList[i].getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
-							sdev = Math.sqrt(var);
-						}
-						writeTfMtd(colID, Double.toString(gmean._sum), Double.toString(sdev), outputDir, fs, agents);
-					}
-				}
-		
-			if(_scnomvList != null)
-				for(int i=0; i < _scnomvList.length; i++ )
-				{
-					int colID = _scnomvList[i];
-					double mean = (_scnomvCountList[i] == 0 ? 0.0 : _scnomvMeanList[i]._sum);
-					double sdev = -1.0;
-					if ( _scnomvMethodList[i] == 2 ) 
-					{
-						double var = _scnomvVarList[i].getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
-						sdev = Math.sqrt(var);
-					}
-					writeTfMtd(colID, Double.toString(mean), Double.toString(sdev), outputDir, fs, agents);
-				}
-			
-		} catch(DMLRuntimeException e) {
-			throw new IOException(e); 
-		}
-	}
-	
-	/** 
-	 * Method to merge map output transformation metadata.
-	 * 
-	 * @param values
-	 * @return
-	 * @throws IOException 
-	 */
-	@Override
-	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
-		double min = Double.MAX_VALUE;
-		double max = -Double.MAX_VALUE;
-		int nbins = 0;
-		double d;
-		long totalRecordCount = 0, totalValidCount=0;
-		String mvConstReplacement = null;
-		
-		DistinctValue val = new DistinctValue();
-		String w = null;
-		
-		class MeanObject {
-			double mean, correction;
-			long count;
-			
-			MeanObject() { }
-			public String toString() {
-				return mean + "," + correction + "," + count;
-			}
-		};
-		HashMap<Integer, MeanObject> mapMeans = new HashMap<Integer, MeanObject>();
-		HashMap<Integer, CM_COV_Object> mapVars = new HashMap<Integer, CM_COV_Object>();
-		boolean isImputed = false;
-		boolean isScaled = false;
-		boolean isBinned = false;
-		
-		while(values.hasNext()) {
-			val.reset();
-			val = values.next();
-			w = val.getWord();
-			
-			if(w.startsWith(MEAN_PREFIX)) {
-				String[] parts = w.split("_");
-				int taskID = UtilFunctions.parseToInt(parts[1]);
-				MeanObject mo = mapMeans.get(taskID);
-				if ( mo==null ) 
-					mo = new MeanObject();
-				
-				mo.mean = UtilFunctions.parseToDouble(parts[2].split(",")[0]);
-				
-				// check if this attribute is scaled
-				String s = parts[2].split(",")[1]; 
-				if(s.equalsIgnoreCase("scmv"))
-					isScaled = isImputed = true;
-				else if ( s.equalsIgnoreCase("scnomv") )
-					isScaled = true;
-				else
-					isImputed = true;
-				
-				mapMeans.put(taskID, mo);
-			}
-			else if (w.startsWith(CORRECTION_PREFIX)) {
-				String[] parts = w.split("_");
-				int taskID = UtilFunctions.parseToInt(parts[1]);
-				MeanObject mo = mapMeans.get(taskID);
-				if ( mo==null ) 
-					mo = new MeanObject();
-				mo.correction = UtilFunctions.parseToDouble(parts[2]);
-				mapMeans.put(taskID, mo);
-			}
-			else if ( w.startsWith(CONSTANT_PREFIX) )
-			{
-				isImputed = true;
-				String[] parts = w.split("_");
-				mvConstReplacement = parts[1];
-			}
-			else if (w.startsWith(COUNT_PREFIX)) {
-				String[] parts = w.split("_");
-				int taskID = UtilFunctions.parseToInt(parts[1]);
-				MeanObject mo = mapMeans.get(taskID);
-				if ( mo==null ) 
-					mo = new MeanObject();
-				mo.count = UtilFunctions.parseToLong(parts[2]);
-				totalValidCount += mo.count;
-				mapMeans.put(taskID, mo);
-			}
-			else if (w.startsWith(TOTAL_COUNT_PREFIX)) {
-				String[] parts = w.split("_");
-				//int taskID = UtilFunctions.parseToInt(parts[1]);
-				totalRecordCount += UtilFunctions.parseToLong(parts[2]);
-			}
-			else if (w.startsWith(VARIANCE_PREFIX)) {
-				isScaled = true;
-				String[] parts = w.split("_");
-				int taskID = UtilFunctions.parseToInt(parts[1]);
-				CM_COV_Object cm = decodeCMObj(parts[2]);
-				mapVars.put(taskID, cm);
-			}
-			else if(w.startsWith(BinAgent.MIN_PREFIX)) {
-				isBinned = true;
-				d = UtilFunctions.parseToDouble( w.substring( BinAgent.MIN_PREFIX.length() ) );
-				if ( d < min )
-					min = d;
-			}
-			else if(w.startsWith(BinAgent.MAX_PREFIX)) {
-				isBinned = true;
-				d = UtilFunctions.parseToDouble( w.substring( BinAgent.MAX_PREFIX.length() ) );
-				if ( d > max )
-					max = d;
-			}
-			else if (w.startsWith(BinAgent.NBINS_PREFIX)) {
-				isBinned = true;
-				nbins = (int) UtilFunctions.parseToLong( w.substring(BinAgent.NBINS_PREFIX.length() ) );
-			}
-			else
-				throw new RuntimeException("MVImputeAgent: Invalid prefix while merging map output: " + w);
-		}
-		
-		// compute global mean across all map outputs
-		KahanObject gmean = new KahanObject(0, 0);
-		KahanPlus kp = KahanPlus.getKahanPlusFnObject();
-		long gcount = 0;
-		for(MeanObject mo : mapMeans.values()) {
-			gcount = gcount + mo.count;
-			if ( gcount > 0) {
-				double delta = mo.mean - gmean._sum;
-				kp.execute2(gmean, delta*mo.count/gcount);
-				//_meanFn.execute2(gmean, mo.mean*mo.count, gcount);
-			}
-		}
-		
-		// compute global variance across all map outputs
-		CM_COV_Object gcm = new CM_COV_Object();
-		try {
-			for(CM_COV_Object cm : mapVars.values())
-				gcm = (CM_COV_Object) _varFn.execute(gcm, cm);
-		} catch (DMLRuntimeException e) {
-			throw new IOException(e);
-		}
-		
-		// If the column is imputed with a constant, then adjust min and max based the value of the constant.
-		if(isImputed && isBinned && mvConstReplacement != null)
-		{
-			double cst = UtilFunctions.parseToDouble(mvConstReplacement);
-			if ( cst < min)
-				min = cst;
-			if ( cst > max)
-				max = cst;
-		}
-
-		// write merged metadata
-		if( isImputed ) 
-		{
-			String imputedValue = null;
-			if ( mvConstReplacement != null )
-				imputedValue = mvConstReplacement;
-			else 
-				imputedValue = Double.toString(gcount == 0 ? 0.0 : gmean._sum);
-			
-			writeTfMtd(colID, imputedValue, outputDir, fs, agents);
-		}
-		
-		if ( isBinned ) {
-			double binwidth = (max-min)/nbins;
-			writeTfMtd(colID, Double.toString(min), Double.toString(max), Double.toString(binwidth), Integer.toString(nbins), outputDir, fs, agents);
-		}
-		
-		if ( isScaled ) 
-		{
-			try {
-				if( totalValidCount != totalRecordCount) {
-					// In the presense of missing values, the variance needs to be adjusted.
-					// The mean does not need to be adjusted, when mv impute method is global_mean, 
-					// since missing values themselves are replaced with gmean.
-					long totalMissingCount = (totalRecordCount-totalValidCount);
-					int idx = isImputed(colID);
-					if(idx != -1 && _mvMethodList[idx] == 3) 
-						_meanFn.execute(gmean, UtilFunctions.parseToDouble(_replacementList[idx]), totalRecordCount);
-					_varFn.execute(gcm, gmean._sum, totalMissingCount);
-				}
-				
-				double mean = (gcount == 0 ? 0.0 : gmean._sum);
-				double var = gcm.getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
-				double sdev = (mapVars.size() > 0 ? Math.sqrt(var) : -1.0 );
-				
-				writeTfMtd(colID, Double.toString(mean), Double.toString(sdev), outputDir, fs, agents);
-				
-				
-			} catch (DMLRuntimeException e) {
-				throw new IOException(e);
-			}
-		}
-	}
-	
-	// ------------------------------------------------------------------------------------------------
-
-	private String readReplacement(int colID, FileSystem fs, Path  txMtdDir, TfUtils agents) throws IOException
-	{
-		Path path = new Path( txMtdDir + "/Impute/" + agents.getName(colID) + MV_FILE_SUFFIX);
-		TfUtils.checkValidInputFile(fs, path, true); 
-		
-		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
-		String line = br.readLine();
-		String replacement =  UtilFunctions.unquote(line.split(TXMTD_SEP)[1]);
-		br.close();
-		
-		return replacement;
-	}
-	
-	public String readScaleLine(int colID, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException
-	{
-		Path path = new Path( txMtdDir + "/Scale/" + agents.getName(colID) + SCALE_FILE_SUFFIX);
-		TfUtils.checkValidInputFile(fs, path, true); 
-		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
-		String line = br.readLine();
-		br.close();
-		
-		return line;
-	}
-	
-	private void processScalingFile(int i, int[] list, KahanObject[] meanList, CM_COV_Object[] varList, FileSystem fs, Path tfMtdDir, TfUtils agents ) throws IOException
-	{
-		int colID = list[i];
-		
-		String line = readScaleLine(colID, fs, tfMtdDir, agents);
-		String[] parts = line.split(",");
-		double mean = UtilFunctions.parseToDouble(parts[1]);
-		double sd = UtilFunctions.parseToDouble(parts[2]);
-		
-		meanList[i]._sum = mean;
-		varList[i].mean._sum = sd;
-	}
-	
-	// ------------------------------------------------------------------------------------------------
-
-	/**
-	 * Method to load transform metadata for all attributes
-	 * 
-	 * @param job
-	 * @throws IOException
-	 */
-	@Override
-	public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException {
-		
-		if(fs.isDirectory(tfMtdDir)) {
-			
-			// Load information about missing value imputation
-			if (_mvList != null)
-				for(int i=0; i<_mvList.length;i++) {
-					int colID = _mvList[i];
-					
-					if ( _mvMethodList[i] == 1 || _mvMethodList[i] == 2 )
-						// global_mean or global_mode
-						_replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents);
-					else if ( _mvMethodList[i] == 3 ) {
-						// constant: replace a missing value by a given constant
-						// nothing to do. The constant values are loaded already during configure 
-					}
-					else
-						throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]);
-				}
-			
-			// Load scaling information
-			if(_mvList != null)
-				for(int i=0; i < _mvList.length; i++)
-					if ( _isMVScaled.get(i) ) 
-						processScalingFile(i, _mvList, _meanList, _varList, fs, tfMtdDir, agents);
-			
-			if(_scnomvList != null)
-				for(int i=0; i < _scnomvList.length; i++)
-					processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents);
-		}
-		else {
-			fs.close();
-			throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir);
-		}
-	}
-	
-	/**
-	 * Method to apply transformations.
-	 * 
-	 * @param words
-	 * @return
-	 */
-	@Override
-	public String[] apply(String[] words, TfUtils agents) {
-		
-		if ( _mvList != null)
-		for(int i=0; i < _mvList.length; i++) {
-			int colID = _mvList[i];
-			String w = UtilFunctions.unquote(words[colID-1]);
-			if(agents.isNA(w))
-				w = words[colID-1] = _replacementList[i];
-			
-			if ( _isMVScaled.get(i) )
-				if ( _mvscMethodList[i] == 1 )
-					words[colID-1] = Double.toString( UtilFunctions.parseToDouble(w) - _meanList[i]._sum );
-				else
-					words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(w) - _meanList[i]._sum) / _varList[i].mean._sum );
-		}
-		
-		if(_scnomvList != null)
-		for(int i=0; i < _scnomvList.length; i++)
-		{
-			int colID = _scnomvList[i];
-			if ( _scnomvMethodList[i] == 1 )
-				words[colID-1] = Double.toString( UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum );
-			else
-				words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum) / _scnomvVarList[i].mean._sum );
-		}
-			
-		return words;
-	}
-	
-	/**
-	 * Check if the given column ID is subjected to this transformation.
-	 * 
-	 */
-	public int isImputed(int colID)
-	{
-		if(_mvList == null)
-			return -1;
-		
-		int idx = Arrays.binarySearch(_mvList, colID);
-		return ( idx >= 0 ? idx : -1);
-	}
-	
-	public MVMethod getMethod(int colID) 
-	{
-		int idx = isImputed(colID);
-		
-		if(idx == -1)
-			return MVMethod.INVALID;
-		
-		switch(_mvMethodList[idx])
-		{
-			case 1: return MVMethod.GLOBAL_MEAN;
-			case 2: return MVMethod.GLOBAL_MODE;
-			case 3: return MVMethod.CONSTANT;
-			default: return MVMethod.INVALID;
-		}
-		
-	}
-	
-	public long getNonMVCount(int colID) 
-	{
-		int idx = isImputed(colID);
-		if(idx == -1)
-			return 0;
-		else
-			return _countList[idx];
-	}
-	
-	public String getReplacement(int colID) 
-	{
-		int idx = isImputed(colID);
-		
-		if(idx == -1)
-			return null;
-		else
-			return _replacementList[idx];
-	}
-	
-	public void print() {
-		System.out.print("MV Imputation List: \n    ");
-		for(int i : _mvList) {
-			System.out.print(i + " ");
-		}
-		System.out.print("\n    ");
-		for(byte b : _mvMethodList) {
-			System.out.print(b + " ");
-		}
-		System.out.println();
-	}
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.wink.json4j.JSONArray;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import scala.Tuple2;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.CM;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.functionobjects.Mean;
+import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.operators.CMOperator;
+import org.apache.sysml.runtime.matrix.operators.CMOperator.AggregateOperationTypes;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public class MVImputeAgent extends TransformationAgent {
+	
+	private static final long serialVersionUID = 9057868620144662194L;
+
+	public static final String MEAN_PREFIX = "mean";
+	public static final String VARIANCE_PREFIX = "var";
+	public static final String CORRECTION_PREFIX = "correction";
+	public static final String COUNT_PREFIX = "validcount";		// #of valid or non-missing values in a column
+	public static final String TOTAL_COUNT_PREFIX = "totalcount";	// #of total records processed by a mapper
+	public static final String CONSTANT_PREFIX = "constant";
+	
+	public enum MVMethod { INVALID, GLOBAL_MEAN, GLOBAL_MODE, CONSTANT };
+	
+	private int[] _mvList = null;
+	/* 
+	 * Imputation Methods:
+	 * 1 - global_mean
+	 * 2 - global_mode
+	 * 3 - constant
+	 * 
+	 */
+	private byte[] _mvMethodList = null;
+	private byte[] _mvscMethodList = null;	// scaling methods for attributes that are imputed and also scaled
+	
+	private BitSet _isMVScaled = null;
+	private CM _varFn = CM.getCMFnObject(AggregateOperationTypes.VARIANCE);		// function object that understands variance computation
+	
+	// objects required to compute mean and variance of all non-missing entries 
+	private Mean _meanFn = Mean.getMeanFnObject();	// function object that understands mean computation
+	private KahanObject[] _meanList = null; 		// column-level means, computed so far
+	private long[] _countList = null;				// #of non-missing values
+	
+	private CM_COV_Object[] _varList = null;		// column-level variances, computed so far (for scaling)
+	
+
+	private int[] 			_scnomvList = null;			// List of attributes that are scaled but not imputed
+	private byte[]			_scnomvMethodList = null;	// scaling methods: 0 for invalid; 1 for mean-subtraction; 2 for z-scoring
+	private KahanObject[] 	_scnomvMeanList = null;		// column-level means, for attributes scaled but not imputed
+	private long[] 			_scnomvCountList = null;	// #of non-missing values, for attributes scaled but not imputed
+	private CM_COV_Object[] _scnomvVarList = null;		// column-level variances, computed so far
+	
+	private String[] _replacementList = null;		// replacements: for global_mean, mean; and for global_mode, recode id of mode category
+	
+	public String[] getReplacements() { return _replacementList; }
+	public KahanObject[] getMeans()   { return _meanList; }
+	public CM_COV_Object[] getVars()  { return _varList; }
+	public KahanObject[] getMeans_scnomv()   { return _scnomvMeanList; }
+	public CM_COV_Object[] getVars_scnomv()  { return _scnomvVarList; }
+	
+	MVImputeAgent(JSONObject parsedSpec) throws JSONException {
+	
+		boolean isMV = parsedSpec.containsKey(TX_METHOD.IMPUTE.toString());
+		boolean isSC = parsedSpec.containsKey(TX_METHOD.SCALE.toString());
+		
+		if(!isMV) {
+			// MV Impute is not applicable
+			_mvList = null;
+			_mvMethodList = null;
+			_meanList = null;
+			_countList = null;
+			_replacementList = null;
+		}
+		else {
+			JSONObject mvobj = (JSONObject) parsedSpec.get(TX_METHOD.IMPUTE.toString());
+			JSONArray mvattrs = (JSONArray) mvobj.get(JSON_ATTRS);
+			JSONArray mvmthds = (JSONArray) mvobj.get(JSON_MTHD);
+			int mvLength = mvattrs.size();
+			
+			assert(mvLength == mvmthds.size());
+			
+			_mvList = new int[mvLength];
+			_mvMethodList = new byte[mvLength];
+			
+			_meanList = new KahanObject[mvLength];
+			_countList = new long[mvLength];
+			_varList = new CM_COV_Object[mvLength];
+			
+			_isMVScaled = new BitSet(_mvList.length);
+			_isMVScaled.clear();
+			
+			for(int i=0; i < _mvList.length; i++) {
+				_mvList[i] = UtilFunctions.toInt(mvattrs.get(i));
+				_mvMethodList[i] = (byte) UtilFunctions.toInt(mvmthds.get(i)); 
+				_meanList[i] = new KahanObject(0, 0);
+			}
+			
+			_replacementList = new String[mvLength]; 	// contains replacements for all columns (scale and categorical)
+			
+			JSONArray constants = (JSONArray)mvobj.get(JSON_CONSTS);
+			for(int i=0; i < constants.size(); i++) {
+				if ( constants.get(i) == null )
+					_replacementList[i] = "NaN";
+				else
+					_replacementList[i] = constants.get(i).toString();
+			}
+		}
+		
+		// Handle scaled attributes
+		if ( !isSC )
+		{
+			// scaling is not applicable
+			_scnomvCountList = null;
+			_scnomvMeanList = null;
+			_scnomvVarList = null;
+		}
+		else
+		{
+			if ( _mvList != null ) 
+				_mvscMethodList = new byte[_mvList.length];
+			
+			JSONObject scobj = (JSONObject) parsedSpec.get(TX_METHOD.SCALE.toString());
+			JSONArray scattrs = (JSONArray) scobj.get(JSON_ATTRS);
+			JSONArray scmthds = (JSONArray) scobj.get(JSON_MTHD);
+			int scLength = scattrs.size();
+			
+			int[] _allscaled = new int[scLength];
+			int scnomv = 0, colID;
+			byte mthd;
+			for(int i=0; i < scLength; i++)
+			{
+				colID = UtilFunctions.toInt(scattrs.get(i));
+				mthd = (byte) UtilFunctions.toInt(scmthds.get(i)); 
+						
+				_allscaled[i] = colID;
+				
+				// check if the attribute is also MV imputed
+				int mvidx = isImputed(colID);
+				if(mvidx != -1)
+				{
+					_isMVScaled.set(mvidx);
+					_mvscMethodList[mvidx] = mthd;
+					_varList[mvidx] = new CM_COV_Object();
+				}
+				else
+					scnomv++;	// count of scaled but not imputed 
+			}
+			
+			if(scnomv > 0)
+			{
+				_scnomvList = new int[scnomv];			
+				_scnomvMethodList = new byte[scnomv];	
+	
+				_scnomvMeanList = new KahanObject[scnomv];
+				_scnomvCountList = new long[scnomv];
+				_scnomvVarList = new CM_COV_Object[scnomv];
+				
+				for(int i=0, idx=0; i < scLength; i++)
+				{
+					colID = UtilFunctions.toInt(scattrs.get(i));
+					mthd = (byte)UtilFunctions.toInt(scmthds.get(i)); 
+							
+					if(isImputed(colID) == -1)
+					{	// scaled but not imputed
+						_scnomvList[idx] = colID;
+						_scnomvMethodList[idx] = mthd;
+						_scnomvMeanList[idx] = new KahanObject(0, 0);
+						_scnomvVarList[idx] = new CM_COV_Object();
+						idx++;
+					}
+				}
+			}
+		}
+	}
+	
+	public void prepare(String[] words, TfUtils agents) throws IOException {
+		
+		try {
+			String w = null;
+			if(_mvList != null)
+			for(int i=0; i <_mvList.length; i++) {
+				int colID = _mvList[i];
+				w = UtilFunctions.unquote(words[colID-1].trim());
+				
+				try {
+				if(!agents.isNA(w)) {
+					_countList[i]++;
+					
+					boolean computeMean = (_mvMethodList[i] == 1 || _isMVScaled.get(i) );
+					if(computeMean) {
+						// global_mean
+						double d = UtilFunctions.parseToDouble(w);
+						_meanFn.execute2(_meanList[i], d, _countList[i]);
+						
+						if (_isMVScaled.get(i) && _mvscMethodList[i] == 2)
+							_varFn.execute(_varList[i], d);
+					}
+					else {
+						// global_mode or constant
+						// Nothing to do here. Mode is computed using recode maps.
+					}
+				}
+				} catch (NumberFormatException e) 
+				{
+					throw new RuntimeException("Encountered \"" + w + "\" in column ID \"" + colID + "\", when expecting a numeric value. Consider adding \"" + w + "\" to na.strings, along with an appropriate imputation method.");
+				}
+			}
+			
+			// Compute mean and variance for attributes that are scaled but not imputed
+			if(_scnomvList != null)
+			for(int i=0; i < _scnomvList.length; i++) 
+			{
+				int colID = _scnomvList[i];
+				w = UtilFunctions.unquote(words[colID-1].trim());
+				double d = UtilFunctions.parseToDouble(w);
+				_scnomvCountList[i]++; 		// not required, this is always equal to total #records processed
+				_meanFn.execute2(_scnomvMeanList[i], d, _scnomvCountList[i]);
+				if(_scnomvMethodList[i] == 2)
+					_varFn.execute(_scnomvVarList[i], d);
+			}
+		} catch(Exception e) {
+			throw new IOException(e);
+		}
+	}
+	
+	// ----------------------------------------------------------------------------------------------------------
+	
+	private String encodeCMObj(CM_COV_Object obj)
+	{
+		StringBuilder sb = new StringBuilder();
+		sb.append(obj.w);
+		sb.append(",");
+		sb.append(obj.mean._sum);
+		sb.append(",");
+		sb.append(obj.mean._correction);
+		sb.append(",");
+		sb.append(obj.m2._sum);
+		sb.append(",");
+		sb.append(obj.m2._correction);
+		return sb.toString();
+	}
+	
+	private CM_COV_Object decodeCMObj(String s) 
+	{
+		CM_COV_Object obj = new CM_COV_Object();
+		String[] parts = s.split(",");
+		obj.w = UtilFunctions.parseToDouble(parts[0]);
+		obj.mean._sum = UtilFunctions.parseToDouble(parts[1]);
+		obj.mean._correction = UtilFunctions.parseToDouble(parts[2]);
+		obj.m2._sum = UtilFunctions.parseToDouble(parts[3]);
+		obj.m2._correction = UtilFunctions.parseToDouble(parts[4]);
+		
+		return obj;
+	}
+	
+	private DistinctValue prepMeanOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
+		
+		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
+		
+		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
+			String suffix = null;
+			if(scnomv)
+				suffix = "scnomv";
+			else if ( mthd ==1 && _isMVScaled.get(idx) )
+				suffix = "scmv"; 	// both scaled and mv imputed
+			else if ( mthd == 1 )
+				suffix = "noscmv";
+			else
+				suffix = "scnomv";
+			
+			sb.setLength(0);
+			sb.append(MEAN_PREFIX);
+			sb.append("_");
+			sb.append(taskID);
+			sb.append("_");
+			double mean = (scnomv ? _scnomvMeanList[idx]._sum : _meanList[idx]._sum);
+			sb.append(Double.toString(mean));
+			sb.append(",");
+			sb.append(suffix);
+			//String s = MEAN_PREFIX + "_" + taskID + "_" + Double.toString(_meanList[idx]._sum) + "," + suffix;
+			return new DistinctValue(sb.toString(), -1L);
+		}
+		
+		return null;
+	}
+	
+	private DistinctValue prepMeanCorrectionOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
+		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
+		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
+			sb.setLength(0);
+			//CORRECTION_PREFIX + "_" + taskID + "_" + Double.toString(mean._correction);
+			sb.append(CORRECTION_PREFIX);
+			sb.append("_");
+			sb.append(taskID);
+			sb.append("_");
+			double corr = (scnomv ? _scnomvMeanList[idx]._correction : _meanList[idx]._correction);
+			sb.append(Double.toString(corr));
+			return new DistinctValue(sb.toString(), -1L);
+		}
+		return null;
+	}
+	
+	private DistinctValue prepMeanCountOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
+		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
+		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
+			sb.setLength(0);
+			//s = COUNT_PREFIX + "_" + taskID + "_" + Long.toString(count);
+			sb.append(COUNT_PREFIX);
+			sb.append("_");
+			sb.append(taskID);
+			sb.append("_");
+			long count = (scnomv ? _scnomvCountList[idx] : _countList[idx]);
+			sb.append( Long.toString(count));
+			return new DistinctValue(sb.toString(), -1L);
+		}
+		return null;
+	}
+	
+	private DistinctValue prepTotalCountOutput(int taskID, int idx, StringBuilder sb, boolean scnomv, TfUtils agents) throws CharacterCodingException {
+		byte mthd = (scnomv ? _scnomvMethodList[idx] : _mvMethodList[idx]);
+		if ( scnomv || mthd == 1 || _isMVScaled.get(idx) ) {
+			sb.setLength(0);
+			//TOTAL_COUNT_PREFIX + "_" + taskID + "_" + Long.toString(TransformationAgent._numValidRecords);
+			sb.append(TOTAL_COUNT_PREFIX);
+			sb.append("_");
+			sb.append(taskID);
+			sb.append("_");
+			sb.append( Long.toString(agents.getValid()) );
+			return new DistinctValue(sb.toString(), -1L);
+		}
+		return null;
+	}
+	
+	private DistinctValue prepConstantOutput(int idx, StringBuilder sb) throws CharacterCodingException {
+		if ( _mvMethodList == null )
+			return null;
+		byte mthd = _mvMethodList[idx];
+		if ( mthd == 3 ) {
+			sb.setLength(0);
+			sb.append(CONSTANT_PREFIX);
+			sb.append("_");
+			sb.append(_replacementList[idx]);
+			return new DistinctValue(sb.toString(), -1);
+		}
+		return null;
+	}
+	
+	private DistinctValue prepVarOutput(int taskID, int idx, StringBuilder sb, boolean scnomv) throws CharacterCodingException {
+		if ( scnomv || _isMVScaled.get(idx) && _mvscMethodList[idx] == 2 ) {
+			sb.setLength(0);
+			sb.append(VARIANCE_PREFIX);
+			sb.append("_");
+			sb.append(taskID);
+			sb.append("_");
+			CM_COV_Object cm = (scnomv ? _scnomvVarList[idx] : _varList[idx]);
+			sb.append(encodeCMObj(cm));
+		
+			return new DistinctValue(sb.toString(), -1L);
+		}
+		return null;
+	}
+	
+	private void outDV(IntWritable iw, DistinctValue dv, OutputCollector<IntWritable, DistinctValue> out) throws IOException {
+		if ( dv != null )	
+			out.collect(iw, dv);
+	}
+	
+	/**
+	 * Method to output transformation metadata from the mappers. 
+	 * This information is collected and merged by the reducers.
+	 * 
+	 * @param out
+	 * @throws IOException
+	 */
+	@Override
+	public void mapOutputTransformationMetadata(OutputCollector<IntWritable, DistinctValue> out, int taskID, TfUtils agents) throws IOException {
+		try { 
+			StringBuilder sb = new StringBuilder();
+			DistinctValue dv = null;
+			
+			if(_mvList != null)
+				for(int i=0; i < _mvList.length; i++) {
+					int colID = _mvList[i];
+					IntWritable iw = new IntWritable(-colID);
+					
+					dv = prepMeanOutput(taskID, i, sb, false);				outDV(iw, dv, out);
+					dv = prepMeanCorrectionOutput(taskID, i, sb, false);	outDV(iw, dv, out);
+					dv = prepMeanCountOutput(taskID, i, sb, false);			outDV(iw, dv, out);
+					dv = prepTotalCountOutput(taskID, i, sb, false, agents); outDV(iw, dv, out);
+					
+					dv = prepConstantOutput(i, sb);							outDV(iw, dv, out);
+					
+					// output variance information relevant to scaling
+					dv = prepVarOutput(taskID, i, sb, false);				outDV(iw, dv, out);
+				}
+			
+			// handle attributes that are scaled but not imputed
+			if(_scnomvList != null)
+				for(int i=0; i < _scnomvList.length; i++)
+				{
+					int colID = _scnomvList[i];
+					IntWritable iw = new IntWritable(-colID);
+					
+					dv = prepMeanOutput(taskID, i, sb, true);				outDV(iw, dv, out);
+					dv = prepMeanCorrectionOutput(taskID, i, sb, true);		outDV(iw, dv, out);
+					dv = prepMeanCountOutput(taskID, i, sb, true);			outDV(iw, dv, out);
+					dv = prepTotalCountOutput(taskID, i, sb, true, agents);	outDV(iw, dv, out);
+					
+					dv = prepVarOutput(taskID, i, sb, true);				outDV(iw, dv, out); 
+				}
+		} catch(Exception e) {
+			throw new IOException(e);
+		}
+	}
+	
+	/**
+	 * Applicable when running on SPARK.
+	 * Helper function to output transformation metadata into shuffle.
+	 * 
+	 * @param iw
+	 * @param dv
+	 * @param list
+	 * @throws IOException
+	 */
+	
+	private void addDV(Integer iw, DistinctValue dv, ArrayList<Tuple2<Integer, DistinctValue>> list) throws IOException {
+		if ( dv != null )	
+			list.add( new Tuple2<Integer, DistinctValue>(iw, dv) );	
+	}
+
+	public ArrayList<Tuple2<Integer, DistinctValue>> mapOutputTransformationMetadata(int taskID, ArrayList<Tuple2<Integer, DistinctValue>> list, TfUtils agents) throws IOException {
+		try { 
+			StringBuilder sb = new StringBuilder();
+			DistinctValue dv = null;
+			
+			if(_mvList != null)
+				for(int i=0; i < _mvList.length; i++) {
+					int colID = _mvList[i];
+					Integer iw = -colID;
+					
+					dv = prepMeanOutput(taskID, i, sb, false);				addDV(iw, dv, list);
+					dv = prepMeanCorrectionOutput(taskID, i, sb, false);	addDV(iw, dv, list);
+					dv = prepMeanCountOutput(taskID, i, sb, false);			addDV(iw, dv, list);
+					dv = prepTotalCountOutput(taskID, i, sb, false, agents); addDV(iw, dv, list);
+					
+					dv = prepConstantOutput(i, sb);							addDV(iw, dv, list);
+					
+					// output variance information relevant to scaling
+					dv = prepVarOutput(taskID, i, sb, false);				addDV(iw, dv, list);
+				}
+			
+			// handle attributes that are scaled but not imputed
+			if(_scnomvList != null)
+				for(int i=0; i < _scnomvList.length; i++)
+				{
+					int colID = _scnomvList[i];
+					Integer iw = -colID;
+					
+					dv = prepMeanOutput(taskID, i, sb, true);				addDV(iw, dv, list);
+					dv = prepMeanCorrectionOutput(taskID, i, sb, true);		addDV(iw, dv, list);
+					dv = prepMeanCountOutput(taskID, i, sb, true);			addDV(iw, dv, list);
+					dv = prepTotalCountOutput(taskID, i, sb, true, agents);	addDV(iw, dv, list);
+					
+					dv = prepVarOutput(taskID, i, sb, true);				addDV(iw, dv, list); 
+				}
+		} catch(Exception e) {
+			throw new IOException(e);
+		}
+		return list;
+	}
+	
+	// ----------------------------------------------------------------------------------------------------------
+	
+	private void writeTfMtd(int colID, String mean, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
+	{
+		Path pt=new Path(tfMtdDir+"/Impute/"+ agents.getName(colID) + MV_FILE_SUFFIX);
+		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		br.write(colID + TXMTD_SEP + mean + "\n");
+		br.close();
+	}
+	
+	private void writeTfMtd(int colID, String mean, String sdev, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
+	{
+		Path pt=new Path(tfMtdDir+"/Scale/"+ agents.getName(colID) + SCALE_FILE_SUFFIX);
+		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		br.write(colID + TXMTD_SEP + mean + TXMTD_SEP + sdev + "\n");
+		br.close();
+	}
+	
+	private void writeTfMtd(int colID, String min, String max, String binwidth, String nbins, String tfMtdDir, FileSystem fs, TfUtils agents) throws IOException 
+	{
+		Path pt = new Path(tfMtdDir+"/Bin/"+ agents.getName(colID) + BIN_FILE_SUFFIX);
+		BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+		br.write(colID + TXMTD_SEP + min + TXMTD_SEP + max + TXMTD_SEP + binwidth + TXMTD_SEP + nbins + "\n");
+		br.close();
+	}
+	
+	public void outputTransformationMetadata(String outputDir, FileSystem fs, TfUtils agents) throws IOException {
+		
+		try{
+			if (_mvList != null)
+				for(int i=0; i < _mvList.length; i++) {
+					int colID = _mvList[i];
+					
+					double imputedValue = Double.NaN;
+					KahanObject gmean = null;
+					if ( _mvMethodList[i] == 1 ) 
+					{
+						gmean = _meanList[i];
+						imputedValue = _meanList[i]._sum;
+						
+						double mean = ( _countList[i] == 0 ? 0.0 : _meanList[i]._sum); 
+						writeTfMtd(colID, Double.toString(mean), outputDir, fs, agents);
+					}
+					else if ( _mvMethodList[i] == 3 ) 
+					{
+						writeTfMtd(colID, _replacementList[i], outputDir, fs, agents);
+						
+						if (_isMVScaled.get(i) )
+						{
+							imputedValue = UtilFunctions.parseToDouble(_replacementList[i]);
+							// adjust the global mean, by combining gmean with "replacement" (weight = #missing values)
+							gmean = new KahanObject(_meanList[i]._sum, _meanList[i]._correction);
+							_meanFn.execute(gmean, imputedValue, agents.getValid());
+						}
+					}
+						
+					if ( _isMVScaled.get(i) ) 
+					{
+						double sdev = -1.0;
+						if ( _mvscMethodList[i] == 2 ) {
+							// Adjust variance with missing values
+							long totalMissingCount = (agents.getValid() - _countList[i]);
+							_varFn.execute(_varList[i], imputedValue, totalMissingCount);
+							double var = _varList[i].getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
+							sdev = Math.sqrt(var);
+						}
+						writeTfMtd(colID, Double.toString(gmean._sum), Double.toString(sdev), outputDir, fs, agents);
+					}
+				}
+		
+			if(_scnomvList != null)
+				for(int i=0; i < _scnomvList.length; i++ )
+				{
+					int colID = _scnomvList[i];
+					double mean = (_scnomvCountList[i] == 0 ? 0.0 : _scnomvMeanList[i]._sum);
+					double sdev = -1.0;
+					if ( _scnomvMethodList[i] == 2 ) 
+					{
+						double var = _scnomvVarList[i].getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
+						sdev = Math.sqrt(var);
+					}
+					writeTfMtd(colID, Double.toString(mean), Double.toString(sdev), outputDir, fs, agents);
+				}
+			
+		} catch(DMLRuntimeException e) {
+			throw new IOException(e); 
+		}
+	}
+	
+	/** 
+	 * Method to merge map output transformation metadata.
+	 * 
+	 * @param values
+	 * @return
+	 * @throws IOException 
+	 */
+	@Override
+	public void mergeAndOutputTransformationMetadata(Iterator<DistinctValue> values, String outputDir, int colID, FileSystem fs, TfUtils agents) throws IOException {
+		double min = Double.MAX_VALUE;
+		double max = -Double.MAX_VALUE;
+		int nbins = 0;
+		double d;
+		long totalRecordCount = 0, totalValidCount=0;
+		String mvConstReplacement = null;
+		
+		DistinctValue val = new DistinctValue();
+		String w = null;
+		
+		class MeanObject {
+			double mean, correction;
+			long count;
+			
+			MeanObject() { }
+			public String toString() {
+				return mean + "," + correction + "," + count;
+			}
+		};
+		HashMap<Integer, MeanObject> mapMeans = new HashMap<Integer, MeanObject>();
+		HashMap<Integer, CM_COV_Object> mapVars = new HashMap<Integer, CM_COV_Object>();
+		boolean isImputed = false;
+		boolean isScaled = false;
+		boolean isBinned = false;
+		
+		while(values.hasNext()) {
+			val.reset();
+			val = values.next();
+			w = val.getWord();
+			
+			if(w.startsWith(MEAN_PREFIX)) {
+				String[] parts = w.split("_");
+				int taskID = UtilFunctions.parseToInt(parts[1]);
+				MeanObject mo = mapMeans.get(taskID);
+				if ( mo==null ) 
+					mo = new MeanObject();
+				
+				mo.mean = UtilFunctions.parseToDouble(parts[2].split(",")[0]);
+				
+				// check if this attribute is scaled
+				String s = parts[2].split(",")[1]; 
+				if(s.equalsIgnoreCase("scmv"))
+					isScaled = isImputed = true;
+				else if ( s.equalsIgnoreCase("scnomv") )
+					isScaled = true;
+				else
+					isImputed = true;
+				
+				mapMeans.put(taskID, mo);
+			}
+			else if (w.startsWith(CORRECTION_PREFIX)) {
+				String[] parts = w.split("_");
+				int taskID = UtilFunctions.parseToInt(parts[1]);
+				MeanObject mo = mapMeans.get(taskID);
+				if ( mo==null ) 
+					mo = new MeanObject();
+				mo.correction = UtilFunctions.parseToDouble(parts[2]);
+				mapMeans.put(taskID, mo);
+			}
+			else if ( w.startsWith(CONSTANT_PREFIX) )
+			{
+				isImputed = true;
+				String[] parts = w.split("_");
+				mvConstReplacement = parts[1];
+			}
+			else if (w.startsWith(COUNT_PREFIX)) {
+				String[] parts = w.split("_");
+				int taskID = UtilFunctions.parseToInt(parts[1]);
+				MeanObject mo = mapMeans.get(taskID);
+				if ( mo==null ) 
+					mo = new MeanObject();
+				mo.count = UtilFunctions.parseToLong(parts[2]);
+				totalValidCount += mo.count;
+				mapMeans.put(taskID, mo);
+			}
+			else if (w.startsWith(TOTAL_COUNT_PREFIX)) {
+				String[] parts = w.split("_");
+				//int taskID = UtilFunctions.parseToInt(parts[1]);
+				totalRecordCount += UtilFunctions.parseToLong(parts[2]);
+			}
+			else if (w.startsWith(VARIANCE_PREFIX)) {
+				isScaled = true;
+				String[] parts = w.split("_");
+				int taskID = UtilFunctions.parseToInt(parts[1]);
+				CM_COV_Object cm = decodeCMObj(parts[2]);
+				mapVars.put(taskID, cm);
+			}
+			else if(w.startsWith(BinAgent.MIN_PREFIX)) {
+				isBinned = true;
+				d = UtilFunctions.parseToDouble( w.substring( BinAgent.MIN_PREFIX.length() ) );
+				if ( d < min )
+					min = d;
+			}
+			else if(w.startsWith(BinAgent.MAX_PREFIX)) {
+				isBinned = true;
+				d = UtilFunctions.parseToDouble( w.substring( BinAgent.MAX_PREFIX.length() ) );
+				if ( d > max )
+					max = d;
+			}
+			else if (w.startsWith(BinAgent.NBINS_PREFIX)) {
+				isBinned = true;
+				nbins = (int) UtilFunctions.parseToLong( w.substring(BinAgent.NBINS_PREFIX.length() ) );
+			}
+			else
+				throw new RuntimeException("MVImputeAgent: Invalid prefix while merging map output: " + w);
+		}
+		
+		// compute global mean across all map outputs
+		KahanObject gmean = new KahanObject(0, 0);
+		KahanPlus kp = KahanPlus.getKahanPlusFnObject();
+		long gcount = 0;
+		for(MeanObject mo : mapMeans.values()) {
+			gcount = gcount + mo.count;
+			if ( gcount > 0) {
+				double delta = mo.mean - gmean._sum;
+				kp.execute2(gmean, delta*mo.count/gcount);
+				//_meanFn.execute2(gmean, mo.mean*mo.count, gcount);
+			}
+		}
+		
+		// compute global variance across all map outputs
+		CM_COV_Object gcm = new CM_COV_Object();
+		try {
+			for(CM_COV_Object cm : mapVars.values())
+				gcm = (CM_COV_Object) _varFn.execute(gcm, cm);
+		} catch (DMLRuntimeException e) {
+			throw new IOException(e);
+		}
+		
+		// If the column is imputed with a constant, then adjust min and max based the value of the constant.
+		if(isImputed && isBinned && mvConstReplacement != null)
+		{
+			double cst = UtilFunctions.parseToDouble(mvConstReplacement);
+			if ( cst < min)
+				min = cst;
+			if ( cst > max)
+				max = cst;
+		}
+
+		// write merged metadata
+		if( isImputed ) 
+		{
+			String imputedValue = null;
+			if ( mvConstReplacement != null )
+				imputedValue = mvConstReplacement;
+			else 
+				imputedValue = Double.toString(gcount == 0 ? 0.0 : gmean._sum);
+			
+			writeTfMtd(colID, imputedValue, outputDir, fs, agents);
+		}
+		
+		if ( isBinned ) {
+			double binwidth = (max-min)/nbins;
+			writeTfMtd(colID, Double.toString(min), Double.toString(max), Double.toString(binwidth), Integer.toString(nbins), outputDir, fs, agents);
+		}
+		
+		if ( isScaled ) 
+		{
+			try {
+				if( totalValidCount != totalRecordCount) {
+					// In the presense of missing values, the variance needs to be adjusted.
+					// The mean does not need to be adjusted, when mv impute method is global_mean, 
+					// since missing values themselves are replaced with gmean.
+					long totalMissingCount = (totalRecordCount-totalValidCount);
+					int idx = isImputed(colID);
+					if(idx != -1 && _mvMethodList[idx] == 3) 
+						_meanFn.execute(gmean, UtilFunctions.parseToDouble(_replacementList[idx]), totalRecordCount);
+					_varFn.execute(gcm, gmean._sum, totalMissingCount);
+				}
+				
+				double mean = (gcount == 0 ? 0.0 : gmean._sum);
+				double var = gcm.getRequiredResult(new CMOperator(_varFn, AggregateOperationTypes.VARIANCE));
+				double sdev = (mapVars.size() > 0 ? Math.sqrt(var) : -1.0 );
+				
+				writeTfMtd(colID, Double.toString(mean), Double.toString(sdev), outputDir, fs, agents);
+				
+				
+			} catch (DMLRuntimeException e) {
+				throw new IOException(e);
+			}
+		}
+	}
+	
+	// ------------------------------------------------------------------------------------------------
+
+	private String readReplacement(int colID, FileSystem fs, Path  txMtdDir, TfUtils agents) throws IOException
+	{
+		Path path = new Path( txMtdDir + "/Impute/" + agents.getName(colID) + MV_FILE_SUFFIX);
+		TfUtils.checkValidInputFile(fs, path, true); 
+		
+		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
+		String line = br.readLine();
+		String replacement =  UtilFunctions.unquote(line.split(TXMTD_SEP)[1]);
+		br.close();
+		
+		return replacement;
+	}
+	
+	public String readScaleLine(int colID, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException
+	{
+		Path path = new Path( txMtdDir + "/Scale/" + agents.getName(colID) + SCALE_FILE_SUFFIX);
+		TfUtils.checkValidInputFile(fs, path, true); 
+		BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
+		String line = br.readLine();
+		br.close();
+		
+		return line;
+	}
+	
+	private void processScalingFile(int i, int[] list, KahanObject[] meanList, CM_COV_Object[] varList, FileSystem fs, Path tfMtdDir, TfUtils agents ) throws IOException
+	{
+		int colID = list[i];
+		
+		String line = readScaleLine(colID, fs, tfMtdDir, agents);
+		String[] parts = line.split(",");
+		double mean = UtilFunctions.parseToDouble(parts[1]);
+		double sd = UtilFunctions.parseToDouble(parts[2]);
+		
+		meanList[i]._sum = mean;
+		varList[i].mean._sum = sd;
+	}
+	
+	// ------------------------------------------------------------------------------------------------
+
+	/**
+	 * Method to load transform metadata for all attributes
+	 * 
+	 * @param job
+	 * @throws IOException
+	 */
+	@Override
+	public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException {
+		
+		if(fs.isDirectory(tfMtdDir)) {
+			
+			// Load information about missing value imputation
+			if (_mvList != null)
+				for(int i=0; i<_mvList.length;i++) {
+					int colID = _mvList[i];
+					
+					if ( _mvMethodList[i] == 1 || _mvMethodList[i] == 2 )
+						// global_mean or global_mode
+						_replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents);
+					else if ( _mvMethodList[i] == 3 ) {
+						// constant: replace a missing value by a given constant
+						// nothing to do. The constant values are loaded already during configure 
+					}
+					else
+						throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]);
+				}
+			
+			// Load scaling information
+			if(_mvList != null)
+				for(int i=0; i < _mvList.length; i++)
+					if ( _isMVScaled.get(i) ) 
+						processScalingFile(i, _mvList, _meanList, _varList, fs, tfMtdDir, agents);
+			
+			if(_scnomvList != null)
+				for(int i=0; i < _scnomvList.length; i++)
+					processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents);
+		}
+		else {
+			fs.close();
+			throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir);
+		}
+	}
+	
+	/**
+	 * Method to apply transformations.
+	 * 
+	 * @param words
+	 * @return
+	 */
+	@Override
+	public String[] apply(String[] words, TfUtils agents) {
+		
+		if ( _mvList != null)
+		for(int i=0; i < _mvList.length; i++) {
+			int colID = _mvList[i];
+			String w = UtilFunctions.unquote(words[colID-1]);
+			if(agents.isNA(w))
+				w = words[colID-1] = _replacementList[i];
+			
+			if ( _isMVScaled.get(i) )
+				if ( _mvscMethodList[i] == 1 )
+					words[colID-1] = Double.toString( UtilFunctions.parseToDouble(w) - _meanList[i]._sum );
+				else
+					words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(w) - _meanList[i]._sum) / _varList[i].mean._sum );
+		}
+		
+		if(_scnomvList != null)
+		for(int i=0; i < _scnomvList.length; i++)
+		{
+			int colID = _scnomvList[i];
+			if ( _scnomvMethodList[i] == 1 )
+				words[colID-1] = Double.toString( UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum );
+			else
+				words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum) / _scnomvVarList[i].mean._sum );
+		}
+			
+		return words;
+	}
+	
+	/**
+	 * Check if the given column ID is subjected to this transformation.
+	 * 
+	 */
+	public int isImputed(int colID)
+	{
+		if(_mvList == null)
+			return -1;
+		
+		int idx = Arrays.binarySearch(_mvList, colID);
+		return ( idx >= 0 ? idx : -1);
+	}
+	
+	public MVMethod getMethod(int colID) 
+	{
+		int idx = isImputed(colID);
+		
+		if(idx == -1)
+			return MVMethod.INVALID;
+		
+		switch(_mvMethodList[idx])
+		{
+			case 1: return MVMethod.GLOBAL_MEAN;
+			case 2: return MVMethod.GLOBAL_MODE;
+			case 3: return MVMethod.CONSTANT;
+			default: return MVMethod.INVALID;
+		}
+		
+	}
+	
+	public long getNonMVCount(int colID) 
+	{
+		int idx = isImputed(colID);
+		if(idx == -1)
+			return 0;
+		else
+			return _countList[idx];
+	}
+	
+	public String getReplacement(int colID) 
+	{
+		int idx = isImputed(colID);
+		
+		if(idx == -1)
+			return null;
+		else
+			return _replacementList[idx];
+	}
+	
+	public void print() {
+		System.out.print("MV Imputation List: \n    ");
+		for(int i : _mvList) {
+			System.out.print(i + " ");
+		}
+		System.out.print("\n    ");
+		for(byte b : _mvMethodList) {
+			System.out.print(b + " ");
+		}
+		System.out.println();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
index 730f40d..bd2feb3 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
@@ -1,124 +1,124 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.wink.json4j.JSONArray;
-import org.apache.wink.json4j.JSONException;
-import org.apache.wink.json4j.JSONObject;
-
-import org.apache.sysml.runtime.util.UtilFunctions;
-
-public class OmitAgent extends TransformationAgent {
-	
-	private static final long serialVersionUID = 1978852120416654195L;
-
-	private int[] _omitList = null;
-
-	OmitAgent() { }
-	
-	OmitAgent(int[] list) {
-		_omitList = list;
-	}
-	
-	public OmitAgent(JSONObject parsedSpec) throws JSONException {
-		if (!parsedSpec.containsKey(TX_METHOD.OMIT.toString()))
-			return;
-		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.OMIT.toString());
-		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
-		
-		_omitList = new int[attrs.size()];
-		for(int i=0; i < _omitList.length; i++) 
-			_omitList[i] = UtilFunctions.toInt(attrs.get(i));
-	}
-	
-	public boolean omit(String[] words, TfUtils agents) 
-	{
-		if(_omitList == null)
-			return false;
-		
-		for(int i=0; i<_omitList.length; i++) 
-		{
-			int colID = _omitList[i];
-			if(agents.isNA(UtilFunctions.unquote(words[colID-1].trim())))
-				return true;
-		}
-		return false;
-	}
-	
-	public boolean isApplicable() 
-	{
-		return (_omitList != null);
-	}
-	
-	/**
-	 * Check if the given column ID is subjected to this transformation.
-	 * 
-	 */
-	public int isOmitted(int colID)
-	{
-		if(_omitList == null)
-			return -1;
-		
-		int idx = Arrays.binarySearch(_omitList, colID);
-		return ( idx >= 0 ? idx : -1);
-	}
-
-	@Override
-	public void print() {
-		System.out.print("Omit List: \n    ");
-		for(int i : _omitList) 
-			System.out.print(i + " ");
-		System.out.println();
-	}
-
-	@Override
-	public void mapOutputTransformationMetadata(
-			OutputCollector<IntWritable, DistinctValue> out, int taskID,
-			TfUtils agents) throws IOException {
-	}
-
-	@Override
-	public void mergeAndOutputTransformationMetadata(
-			Iterator<DistinctValue> values, String outputDir, int colID,
-			FileSystem fs, TfUtils agents) throws IOException {
-	}
-
-	@Override
-	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents)
-			throws IOException {
-	}
-
-	@Override
-	public String[] apply(String[] words, TfUtils agents) {
-		return null;
-	}
-
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.wink.json4j.JSONArray;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public class OmitAgent extends TransformationAgent {
+	
+	private static final long serialVersionUID = 1978852120416654195L;
+
+	private int[] _omitList = null;
+
+	OmitAgent() { }
+	
+	OmitAgent(int[] list) {
+		_omitList = list;
+	}
+	
+	public OmitAgent(JSONObject parsedSpec) throws JSONException {
+		if (!parsedSpec.containsKey(TX_METHOD.OMIT.toString()))
+			return;
+		JSONObject obj = (JSONObject) parsedSpec.get(TX_METHOD.OMIT.toString());
+		JSONArray attrs = (JSONArray) obj.get(JSON_ATTRS);
+		
+		_omitList = new int[attrs.size()];
+		for(int i=0; i < _omitList.length; i++) 
+			_omitList[i] = UtilFunctions.toInt(attrs.get(i));
+	}
+	
+	public boolean omit(String[] words, TfUtils agents) 
+	{
+		if(_omitList == null)
+			return false;
+		
+		for(int i=0; i<_omitList.length; i++) 
+		{
+			int colID = _omitList[i];
+			if(agents.isNA(UtilFunctions.unquote(words[colID-1].trim())))
+				return true;
+		}
+		return false;
+	}
+	
+	public boolean isApplicable() 
+	{
+		return (_omitList != null);
+	}
+	
+	/**
+	 * Check if the given column ID is subjected to this transformation.
+	 * 
+	 */
+	public int isOmitted(int colID)
+	{
+		if(_omitList == null)
+			return -1;
+		
+		int idx = Arrays.binarySearch(_omitList, colID);
+		return ( idx >= 0 ? idx : -1);
+	}
+
+	@Override
+	public void print() {
+		System.out.print("Omit List: \n    ");
+		for(int i : _omitList) 
+			System.out.print(i + " ");
+		System.out.println();
+	}
+
+	@Override
+	public void mapOutputTransformationMetadata(
+			OutputCollector<IntWritable, DistinctValue> out, int taskID,
+			TfUtils agents) throws IOException {
+	}
+
+	@Override
+	public void mergeAndOutputTransformationMetadata(
+			Iterator<DistinctValue> values, String outputDir, int colID,
+			FileSystem fs, TfUtils agents) throws IOException {
+	}
+
+	@Override
+	public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents)
+			throws IOException {
+	}
+
+	@Override
+	public String[] apply(String[] words, TfUtils agents) {
+		return null;
+	}
+
+
+}
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
index 55c9ebd..295c056 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
@@ -461,5 +461,5 @@ public class RecodeAgent extends TransformationAgent {
 		}
 		System.out.println();
 	}
-}
+}
  
\ No newline at end of file


[48/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/GLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM.dml b/scripts/algorithms/GLM.dml
index bed88a1..25832c5 100644
--- a/scripts/algorithms/GLM.dml
+++ b/scripts/algorithms/GLM.dml
@@ -1,1167 +1,1167 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 
-# THIS SCRIPT SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST REGIONS
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X     String  ---     Location to read the matrix X of feature vectors
-# Y     String  ---     Location to read response matrix Y with either 1 or 2 columns:
-#                       if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
-# B     String  ---     Location to store estimated regression parameters (the betas)
-# fmt   String "text"   The betas matrix output format, such as "text" or "csv"
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
-# dfam  Int     1       Distribution family code: 1 = Power, 2 = Binomial
-# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link  Int     0       Link function code: 0 = canonical (depends on distribution),
-#                       1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
-# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
-#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# yneg  Double  0.0     Response value for Bernoulli "No" label, usually 0.0 or -1.0
-# icpt  Int     0       Intercept presence, X columns shifting and rescaling:
-#                       0 = no intercept, no shifting, no rescaling;
-#                       1 = add intercept, but neither shift nor rescale X;
-#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double  0.0     Regularization parameter (lambda) for L2 regularization
-# tol   Double 0.000001 Tolerance (epsilon)
-# disp  Double  0.0     (Over-)dispersion value, or 0.0 to estimate it from data
-# moi   Int     200     Maximum number of outer (Newton / Fisher Scoring) iterations
-# mii   Int     0       Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: Matrix beta, whose size depends on icpt:
-#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
-#
-# In addition, some GLM statistics are provided in CSV format, one comma-separated name-value
-# pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# TERMINATION_CODE      A positive integer indicating success/failure as follows:
-#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
-#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
-# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
-# BETA_MIN_INDEX        Column index for the smallest beta value
-# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
-# BETA_MAX_INDEX        Column index for the largest beta value
-# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
-# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
-#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
-# DISPERSION_EST        Dispersion estimated from the dataset
-# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
-# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
-# -------------------------------------------------------------------------------------------
-#
-# The Log file, when requested, contains the following per-iteration variables in CSV format,
-# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
-# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
-# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
-# OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
-# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
-# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
-# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
-# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
-# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
-# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
-# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
-# TRUST_DELTA           Updated trust region size, the "delta"
-# -------------------------------------------------------------------------------------------
-#
-# Example with distribution = "Binomial.logit":
-# hadoop jar SystemML.jar -f GLM_HOME/GLM.dml -nvargs dfam=2 link=2 yneg=-1.0 icpt=2 reg=0.001
-#     tol=0.00000001 disp=1.0 moi=100 mii=10 X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas 
-#     fmt=csv O=OUTPUT_DIR/stats Log=OUTPUT_DIR/log
-#
-# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
-# AND LINK FUNCTIONS:
-# -----------------------------------------------
-# INPUT PARAMETERS:    MEANING:            Cano-
-# dfam vpow link lpow  Distribution.link   nical?
-# -----------------------------------------------
-#  1   0.0   1  -1.0   Gaussian.inverse
-#  1   0.0   1   0.0   Gaussian.log
-#  1   0.0   1   1.0   Gaussian.id          Yes
-#  1   1.0   1   0.0   Poisson.log          Yes
-#  1   1.0   1   0.5   Poisson.sqrt
-#  1   1.0   1   1.0   Poisson.id
-#  1   2.0   1  -1.0   Gamma.inverse        Yes
-#  1   2.0   1   0.0   Gamma.log
-#  1   2.0   1   1.0   Gamma.id
-#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
-#  1   3.0   1  -1.0   InvGaussian.inverse
-#  1   3.0   1   0.0   InvGaussian.log
-#  1   3.0   1   1.0   InvGaussian.id
-#  1    *    1    *    AnyVariance.AnyLink
-# -----------------------------------------------
-#  2    *    1   0.0   Binomial.log
-#  2    *    1   0.5   Binomial.sqrt
-#  2    *    2    *    Binomial.logit       Yes
-#  2    *    3    *    Binomial.probit
-#  2    *    4    *    Binomial.cloglog
-#  2    *    5    *    Binomial.cauchit
-# -----------------------------------------------
-
-
-# Default values for input parameters
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileO = ifdef ($O, " ");
-fileLog = ifdef ($Log, " ");
-fmtB = ifdef ($fmt, "text");
-
-distribution_type = ifdef ($dfam, 1);                # $dfam = 1;
-variance_as_power_of_the_mean = ifdef ($vpow, 0.0);  # $vpow = 0.0;
-link_type = ifdef ($link, 0);                        # $link = 0;
-link_as_power_of_the_mean = ifdef ($lpow, 1.0);      # $lpow = 1.0;
-bernoulli_No_label = ifdef ($yneg, 0.0);             # $yneg = 0.0;
-intercept_status = ifdef ($icpt, 0);                 # $icpt = 0;
-dispersion = ifdef ($disp, 0.0);                     # $disp = 0.0;
-regularization = ifdef ($reg, 0.0);                  # $reg  = 0.0;
-eps = ifdef ($tol, 0.000001);                        # $tol  = 0.000001;
-max_iteration_IRLS = ifdef ($moi, 200);              # $moi  = 200;
-max_iteration_CG = ifdef ($mii, 0);                  # $mii  = 0;
-
-variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
-link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
-bernoulli_No_label = as.double (bernoulli_No_label);
-dispersion = as.double (dispersion);
-eps = as.double (eps);
-
-
-# Default values for output statistics:
-
-termination_code     = 0.0;
-min_beta             = 0.0 / 0.0;
-i_min_beta           = 0.0 / 0.0;
-max_beta             = 0.0 / 0.0;
-i_max_beta           = 0.0 / 0.0;
-intercept_value      = 0.0 / 0.0;
-dispersion           = 0.0 / 0.0;
-estimated_dispersion = 0.0 / 0.0;
-deviance_nodisp      = 0.0 / 0.0;
-deviance             = 0.0 / 0.0;
-
-print("BEGIN GLM SCRIPT");
-print("Reading X...");
-X = read (fileX);
-print("Reading Y...");
-Y = read (fileY);
-
-num_records  = nrow (X);
-num_features = ncol (X);
-zeros_r = matrix (0, rows = num_records, cols = 1);
-ones_r = 1 + zeros_r;
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-
-if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
-{
-    X = append (X, ones_r);
-    num_features = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = num_features, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
-    scale_lambda [num_features, 1] = 0;
-}
-
-if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
-{                           # Important assumption: X [, num_features] = ones_r
-    avg_X_cols = t(colSums(X)) / num_records;
-    var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
-    is_unsafe = ppred (var_X_cols, 0.0, "<=");
-    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [num_features, 1] = 1;
-    shift_X = - avg_X_cols * scale_X;
-    shift_X [num_features, 1] = 0;
-    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
-} else {
-    scale_X = matrix (1, rows = num_features, cols = 1);
-    shift_X = matrix (0, rows = num_features, cols = 1);
-    rowSums_X_sq = rowSums (X ^ 2);
-}
-
-# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
-# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
-# The transform is then associatively applied to the other side of the expression,
-# and is rewritten via "scale_X" and "shift_X" as follows:
-#
-# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-# ssX_A  = diag (scale_X) %*% A;
-# ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
-
-# Initialize other input-dependent parameters
-
-lambda = scale_lambda * regularization;
-if (max_iteration_CG == 0) {
-    max_iteration_CG = num_features;
-}
-
-# In Bernoulli case, convert one-column "Y" into two-column
-
-if (distribution_type == 2 & ncol(Y) == 1)
-{
-    is_Y_negative = ppred (Y, bernoulli_No_label, "==");
-    Y = append (1 - is_Y_negative, is_Y_negative);
-    count_Y_negative = sum (is_Y_negative);
-    if (count_Y_negative == 0) {
-        stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
-    }
-    if (count_Y_negative == nrow(Y)) {
-        stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
-    }
-}
-
-# Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
-
-if (link_type == 0)
-{
-    if (distribution_type == 1) {
-        link_type = 1;
-        link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
-    } else { if (distribution_type == 2) {
-            link_type = 2;
-}   }   }
-
-# For power distributions and/or links, we use two constants,
-# "variance as power of the mean" and "link_as_power_of_the_mean",
-# to specify the variance and the link as arbitrary powers of the
-# mean.  However, the variance-powers of 1.0 (Poisson family) and
-# 2.0 (Gamma family) have to be treated as special cases, because
-# these values integrate into logarithms.  The link-power of 0.0
-# is also special as it represents the logarithm link.
-
-num_response_columns = ncol (Y);
-
-is_supported = check_if_supported (num_response_columns, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-if (is_supported == 1)
-{
-
-#####   INITIALIZE THE BETAS   #####
-
-[beta, saturated_log_l, isNaN] = 
-    glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
-if (isNaN == 0)
-{
-
-#####  START OF THE MAIN PART  #####
-
-sum_X_sq = sum (rowSums_X_sq);
-trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
-###  max_trust_delta = trust_delta * 10000.0;
-log_l = 0.0;
-deviance_nodisp = 0.0;
-new_deviance_nodisp = 0.0;
-isNaN_log_l = 2;
-newbeta = beta;
-g = matrix (0.0, rows = num_features, cols = 1);
-g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-accept_new_beta = 1;
-reached_trust_boundary = 0;
-neg_log_l_change_predicted = 0.0;
-i_IRLS = 0;
-
-print ("BEGIN IRLS ITERATIONS...");
-
-ssX_newbeta = diag (scale_X) %*% newbeta;
-ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-all_linear_terms = X %*% ssX_newbeta;
-
-[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-    (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-if (isNaN_new_log_l == 0) {
-    new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-    new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-}
-
-if (fileLog != " ") {
-    log_str = "POINT_STEP_NORM," + i_IRLS + "," + sqrt (sum (beta ^ 2));
-    log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
-    log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
-    log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
-} else {
-    log_str = " ";
-}
-
-while (termination_code == 0)
-{
-    accept_new_beta = 1;
-    
-    if (i_IRLS > 0)
-    {
-        if (isNaN_log_l == 0) {
-            accept_new_beta = 0;
-        }
-
-# Decide whether to accept a new iteration point and update the trust region
-# See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-
-        rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
-        if (rho < 0.25 | isNaN_new_log_l == 1) {
-            trust_delta = 0.25 * trust_delta;
-        }
-        if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
-            trust_delta = 2 * trust_delta;
-            
-### if (trust_delta > max_trust_delta) {
-###     trust_delta = max_trust_delta;
-### }
-
-        }
-        if (rho > 0.1 & isNaN_new_log_l == 0) {
-            accept_new_beta = 1;
-        }
-    }
-
-    if (fileLog != " ") {
-        log_str = append (log_str, "IS_POINT_UPDATED," + i_IRLS + "," + accept_new_beta);
-        log_str = append (log_str, "TRUST_DELTA,"      + i_IRLS + "," + trust_delta);
-    }
-    if (accept_new_beta == 1)
-    {
-        beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
-        
-        [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-        
-        # We introduced these variables to avoid roundoff errors:
-        #     g_Y = y_residual / (y_var * link_grad);
-        #     w   = 1.0 / (y_var * link_grad * link_grad);
-                      
-        gXY = - t(X) %*% g_Y;
-        g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
-        g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-        
-        if (fileLog != " ") {
-            log_str = append (log_str, "GRADIENT_NORM," + i_IRLS + "," + g_norm);
-        }
-    }
-    
-    [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
-        get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
-
-    newbeta = beta + z;
-    
-    ssX_newbeta = diag (scale_X) %*% newbeta;
-    ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-    all_linear_terms = X %*% ssX_newbeta;
-    
-    [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-        (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-    if (isNaN_new_log_l == 0) {
-        new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-        new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-    }
-        
-    log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
-
-    if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
-        (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )  
-    {
-        termination_code = 1;
-    }
-    rho = - log_l_change / neg_log_l_change_predicted;
-    z_norm = sqrt (sum (z * z));
-    
-    [z_norm_m, z_norm_e] = round_to_print (z_norm);
-    [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
-    [rho_m, rho_e] = round_to_print (rho);
-    [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
-    [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
-    [g_norm_m, g_norm_e] = round_to_print (g_norm);
-
-    i_IRLS = i_IRLS + 1;
-    print ("Iter #" + i_IRLS + " completed"
-        + ", ||z|| = " + z_norm_m + "E" + z_norm_e
-        + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
-        + ", reached = " + reached_trust_boundary
-        + ", ||g|| = " + g_norm_m + "E" + g_norm_e
-        + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
-        + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
-        + ", rho = " + rho_m + "E" + rho_e);
-        
-    if (fileLog != " ") {
-        log_str = append (log_str, "NUM_CG_ITERS,"     + i_IRLS + "," + num_CG_iters);
-        log_str = append (log_str, "IS_TRUST_REACHED," + i_IRLS + "," + reached_trust_boundary);
-        log_str = append (log_str, "POINT_STEP_NORM,"  + i_IRLS + "," + z_norm);
-        log_str = append (log_str, "OBJECTIVE,"        + i_IRLS + "," + (- new_log_l));
-        log_str = append (log_str, "OBJ_DROP_REAL,"    + i_IRLS + "," + log_l_change);
-        log_str = append (log_str, "OBJ_DROP_PRED,"    + i_IRLS + "," + (- neg_log_l_change_predicted));
-        log_str = append (log_str, "OBJ_DROP_RATIO,"   + i_IRLS + "," + rho);
-        log_str = append (log_str, "LINEAR_TERM_MIN,"  + i_IRLS + "," + min (all_linear_terms));
-        log_str = append (log_str, "LINEAR_TERM_MAX,"  + i_IRLS + "," + max (all_linear_terms));
-    }
-        
-    if (i_IRLS == max_iteration_IRLS) {
-        termination_code = 2;
-    }
-}
-
-beta = newbeta;
-log_l = new_log_l;
-deviance_nodisp = new_deviance_nodisp;
-
-if (termination_code == 1) {
-    print ("Converged in " + i_IRLS + " steps.");
-} else {
-    print ("Did not converge.");
-}
-
-ssX_beta = diag (scale_X) %*% beta;
-ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
-if (intercept_status == 2) {
-    beta_out = append (ssX_beta, beta);
-} else {
-    beta_out = ssX_beta;
-}
-
-write (beta_out, fileB, format=fmtB);
-
-if (intercept_status == 1 | intercept_status == 2) {
-    intercept_value = castAsScalar (beta_out [num_features, 1]);
-    beta_noicept = beta_out [1 : (num_features - 1), 1];
-} else {
-    beta_noicept = beta_out [1 : num_features, 1];
-}
-min_beta = min (beta_noicept);
-max_beta = max (beta_noicept);
-tmp_i_min_beta = rowIndexMin (t(beta_noicept))
-i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
-tmp_i_max_beta = rowIndexMax (t(beta_noicept))
-i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
-
-#####  OVER-DISPERSION PART  #####
-
-all_linear_terms = X %*% ssX_beta;
-[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-    
-pearson_residual_sq = g_Y ^ 2 / w;
-pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
-# pearson_residual_sq = (y_residual ^ 2) / y_var;
-
-if (num_records > num_features) {
-    estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
-}
-if (dispersion <= 0.0) {
-    dispersion = estimated_dispersion;
-}
-deviance = deviance_nodisp / dispersion;
-
-if (fileLog != " ") {
-    write (log_str, fileLog);
-}
-
-#####  END OF THE MAIN PART  #####
-
-} else { print ("Input matrices are out of range.  Terminating the DML."); termination_code = 3; }
-} else { print ("Distribution/Link not supported.  Terminating the DML."); termination_code = 4; }
-
-str = "TERMINATION_CODE," + termination_code;
-str = append (str, "BETA_MIN," + min_beta);
-str = append (str, "BETA_MIN_INDEX," + i_min_beta);
-str = append (str, "BETA_MAX," + max_beta);
-str = append (str, "BETA_MAX_INDEX," + i_max_beta);
-str = append (str, "INTERCEPT," + intercept_value);
-str = append (str, "DISPERSION," + dispersion);
-str = append (str, "DISPERSION_EST," + estimated_dispersion);
-str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
-str = append (str, "DEVIANCE_SCALED," + deviance);
-
-if (fileO != " ") {
-    write (str, fileO);
-} else {
-    print (str);
-}
-
-
-
-
-check_if_supported = 
-    function (int ncol_y, int dist_type, double var_power, int link_type, double link_power)
-    return   (int is_supported)
-{
-    is_supported = 0;
-    if (ncol_y == 1 & dist_type == 1 & link_type == 1)
-    { # POWER DISTRIBUTION
-        is_supported = 1;
-        if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse");      } else {
-        if (var_power == 0.0 & link_power ==  0.0) {print ("Gaussian.log");          } else {
-        if (var_power == 0.0 & link_power ==  0.5) {print ("Gaussian.sqrt");         } else {
-        if (var_power == 0.0 & link_power ==  1.0) {print ("Gaussian.id");           } else {
-        if (var_power == 0.0                     ) {print ("Gaussian.power_nonlog"); } else {
-        if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse");       } else {
-        if (var_power == 1.0 & link_power ==  0.0) {print ("Poisson.log");           } else {
-        if (var_power == 1.0 & link_power ==  0.5) {print ("Poisson.sqrt");          } else {
-        if (var_power == 1.0 & link_power ==  1.0) {print ("Poisson.id");            } else {
-        if (var_power == 1.0                     ) {print ("Poisson.power_nonlog");  } else {
-        if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse");         } else {
-        if (var_power == 2.0 & link_power ==  0.0) {print ("Gamma.log");             } else {
-        if (var_power == 2.0 & link_power ==  0.5) {print ("Gamma.sqrt");            } else {
-        if (var_power == 2.0 & link_power ==  1.0) {print ("Gamma.id");              } else {
-        if (var_power == 2.0                     ) {print ("Gamma.power_nonlog");    } else {
-        if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2");    } else {
-        if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse");   } else {
-        if (var_power == 3.0 & link_power ==  0.0) {print ("InvGaussian.log");       } else {
-        if (var_power == 3.0 & link_power ==  0.5) {print ("InvGaussian.sqrt");      } else {
-        if (var_power == 3.0 & link_power ==  1.0) {print ("InvGaussian.id");        } else {
-        if (var_power == 3.0                     ) {print ("InvGaussian.power_nonlog");}else{
-        if (                   link_power ==  0.0) {print ("PowerDist.log");         } else {
-                                                    print ("PowerDist.power_nonlog");
-    }   }}}}} }}}}} }}}}} }}}}} }}
-    if (ncol_y == 1 & dist_type == 2)
-    {
-        print ("Error: Bernoulli response matrix has not been converted into two-column format.");
-    }
-    if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        is_supported = 1;
-        if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse");        } else {
-        if (link_type == 1 & link_power ==  0.0) {print ("Binomial.log");            } else {
-        if (link_type == 1 & link_power ==  0.5) {print ("Binomial.sqrt");           } else {
-        if (link_type == 1 & link_power ==  1.0) {print ("Binomial.id");             } else {
-        if (link_type == 1)                      {print ("Binomial.power_nonlog");   } else {
-        if (link_type == 2)                      {print ("Binomial.logit");          } else {
-        if (link_type == 3)                      {print ("Binomial.probit");         } else {
-        if (link_type == 4)                      {print ("Binomial.cloglog");        } else {
-        if (link_type == 5)                      {print ("Binomial.cauchit");        }
-    }   }}}}} }}}
-    if (is_supported == 0) {
-        print ("Response matrix with " + ncol_y + " columns, distribution family (" + dist_type + ", " + var_power
-             + ") and link family (" + link_type + ", " + link_power + ") are NOT supported together.");
-    }
-}
-
-glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
-return (Matrix[double] beta, double saturated_log_l, int isNaN)
-{
-    saturated_log_l = 0.0;
-    isNaN = 0;
-    y_corr = Y [, 1];
-    if (dist_type == 2) {
-        n_corr = rowSums (Y);
-        is_n_zero = ppred (n_corr, 0.0, "==");
-        y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
-    }
-    linear_terms = y_corr;
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-        if          (link_power ==  0.0) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_power ==  1.0) {
-            linear_terms = y_corr;
-        } else { if (link_power == -1.0) {
-            linear_terms = 1.0 / y_corr;
-        } else { if (link_power ==  0.5) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                linear_terms = sqrt (y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_power >   0.0) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-            } else { isNaN = 1; }
-        } else {
-            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-                linear_terms = y_corr ^ link_power;
-            } else { isNaN = 1; }
-        }}}}}
-    }
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-            } else { isNaN = 1; }
-        } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
-            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-                linear_terms = y_corr ^ link_power;
-            } else { isNaN = 1; }
-        } else { 
-            is_zero_y_corr = ppred (y_corr, 0.0, "<=");
-            is_one_y_corr  = ppred (y_corr, 1.0, ">=");
-            y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
-            if (link_type == 2)                           { # Binomial.logit
-                linear_terms = log (y_corr / (1.0 - y_corr)) 
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 3)                  { # Binomial.probit
-                y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
-                t = sqrt (- 2.0 * log (y_below_half));
-                approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
-                linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 4)                  { # Binomial.cloglog
-                linear_terms = log (- log (1.0 - y_corr))
-                    - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 5)                  { # Binomial.cauchit
-                linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        }}  }}}}}
-    }
-    
-    if (isNaN == 0) {
-        [saturated_log_l, isNaN] = 
-            glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
-    }
-    
-    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
-        (dist_type == 2 & link_type >= 2))
-    {    
-        desired_eta = 0.0;
-    } else { if (link_type == 1 & link_power == 0.0) {
-        desired_eta = log (0.5);
-    } else { if (link_type == 1) {
-        desired_eta = 0.5 ^ link_power;
-    } else {
-        desired_eta = 0.5;
-    }}}
-    
-    beta = matrix (0.0, rows = ncol(X), cols = 1);
-    
-    if (desired_eta != 0.0) {
-        if (icept_status == 1 | icept_status == 2) {
-            beta [nrow(beta), 1] = desired_eta;
-        } else {
-            # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
-            # Note that "ssX_transform" is trivial here, hence ignored
-            
-            beta = straightenX (X, 0.000001, max_iter_CG);  
-            beta = beta * desired_eta;
-}   }   }
-
-
-glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
-                     int dist_type, double var_power, int link_type, double link_power)
-    return (Matrix[double] g_Y, Matrix[double] w)
-    # ORIGINALLY we returned more meaningful vectors, namely:
-    # Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
-    # Matrix[double] link_gradient : derivative of the link function
-    # Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
-    # BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
-    # and skip over the "meaningful intermediaries".  Now we output these two variables:
-    #     g_Y = y_residual / (var_function * link_gradient);
-    #     w   = 1.0 / (var_function * link_gradient ^ 2);
-{
-    num_records = nrow (linear_terms);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    ones_r = 1 + zeros_r;
-    g_Y  = zeros_r;
-    w  = zeros_r;
-
-    # Some constants
-
-    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
-    ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = ones_2;
-    p_one_m_one [1, 2] = -1.0;
-    m_one_p_one = ones_2;
-    m_one_p_one [1, 1] = -1.0;
-    zero_one = ones_2;
-    zero_one [1, 1] = 0.0;
-    one_zero = ones_2;
-    one_zero [1, 2] = 0.0;
-    flip_pos = matrix (0, rows = 2, cols = 2);
-    flip_neg = flip_pos;
-    flip_pos [1, 2] = 1;
-    flip_pos [2, 1] = 1;
-    flip_neg [1, 2] = -1;
-    flip_neg [2, 1] = 1;
-    
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-        y_mean = zeros_r;
-        if          (link_power ==  0.0) {
-            y_mean = exp (linear_terms);
-            y_mean_pow = y_mean ^ (1 - var_power);
-            w   = y_mean_pow * y_mean;
-            g_Y = y_mean_pow * (Y - y_mean);
-        } else { if (link_power ==  1.0) {
-            y_mean = linear_terms;
-            w   = y_mean ^ (- var_power);
-            g_Y = w * (Y - y_mean);
-        } else {
-            y_mean = linear_terms ^ (1.0 / link_power);
-            c1  = (1 - var_power) / link_power - 1;
-            c2  = (2 - var_power) / link_power - 2;
-            g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
-            w   = (linear_terms ^ c2) / (link_power ^ 2);
-    }   }}
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        if (link_type == 1) { # BINOMIAL.POWER LINKS
-            if (link_power == 0.0)  { # Binomial.log
-                vec1 = 1 / (exp (- linear_terms) - 1);
-                g_Y = Y [, 1] - Y [, 2] * vec1;
-                w   = rowSums (Y) * vec1;
-            } else {                  # Binomial.nonlog
-                vec1 = zeros_r;
-                if (link_power == 0.5)  {
-                    vec1 = 1 / (1 - linear_terms ^ 2);
-                } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-                    vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
-                } else {isNaN = 1;}}
-                # We want a "zero-protected" version of
-                #     vec2 = Y [, 1] / linear_terms;
-                is_y_0 = ppred (Y [, 1], 0.0, "==");
-                vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
-                g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
-                w   =  rowSums (Y) * vec1 / link_power ^ 2;
-            }
-        } else {
-            is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-            is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-            is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-            finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-            finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-            if (link_type == 2)                           { # Binomial.logit
-                Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-                Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-                Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-                g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;
-                w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;
-            } else { if (link_type == 3)                  { # Binomial.probit
-                is_lt_pos = ppred (linear_terms, 0.0, ">=");
-                t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-                pt_gp = t_gp * ( 0.254829592 
-                      + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                      + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                      + t_gp * (-1.453152027 
-                      + t_gp *   1.061405429))));
-                the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
-                vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
-                vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);
-                w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
-                g_Y = one_over_sqrt_two_pi * vec2 / vec1;
-            } else { if (link_type == 4)                  { # Binomial.cloglog
-                the_exp = exp (linear_terms)
-                the_exp_exp = exp (- the_exp);
-                is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-                the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
-                g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
-                w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
-            } else { if (link_type == 5)                  { # Binomial.cauchit
-                Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-                Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-                y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
-                var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
-                link_gradient_normalized = (1 + linear_terms ^ 2) * 3.1415926535897932384626433832795;
-                g_Y =  rowSums (Y) * y_residual / (var_function * link_gradient_normalized);
-                w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);
-            }}}}   
-        }
-    }
-}
-
-
-glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,
-        int dist_type, double var_power, int link_type, double link_power)
-    return (double log_l, int isNaN)
-{
-    isNaN = 0;
-    log_l = 0.0;
-    num_records = nrow (Y);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    
-    if (dist_type == 1 & link_type == 1)
-    { # POWER DISTRIBUTION
-        b_cumulant = zeros_r;
-        natural_parameters = zeros_r;
-        is_natural_parameter_log_zero = zeros_r;
-        if          (var_power == 1.0 & link_power == 0.0)  { # Poisson.log
-            b_cumulant = exp (linear_terms);
-            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-        } else { if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                b_cumulant = linear_terms;
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                b_cumulant = linear_terms ^ 2;
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0 & link_power  > 0.0)  { # Poisson.power_nonlog, pos
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-                natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0)                      { # Poisson.power_nonlog, neg
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = linear_terms ^ (1.0 / link_power);
-                natural_parameters = log (linear_terms) / link_power;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = - log (linear_terms);
-                natural_parameters = - linear_terms;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = log (linear_terms);
-                natural_parameters = - 1.0 / linear_terms;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power ==  0.0) { # Gamma.log
-            b_cumulant = linear_terms;
-            natural_parameters = - exp (- linear_terms);
-        } else { if (var_power == 2.0)                      { # Gamma.power_nonlog
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = log (linear_terms) / link_power;
-                natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-            } else {isNaN = 1;}
-        } else { if                    (link_power ==  0.0) { # PowerDist.log
-            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-        } else {                                              # PowerDist.power_nonlog
-            if          (-2 * link_power == 1.0 - var_power) {
-                natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
-            } else { if (-1 * link_power == 1.0 - var_power) {
-                natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
-            } else { if (     link_power == 1.0 - var_power) {
-                natural_parameters = linear_terms / (1.0 - var_power);
-            } else { if ( 2 * link_power == 1.0 - var_power) {
-                natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
-            } else {
-                if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                    power = (1.0 - var_power) / link_power;
-                    natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
-                } else {isNaN = 1;}
-            }}}}
-            if          (-2 * link_power == 2.0 - var_power) {
-                b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
-            } else { if (-1 * link_power == 2.0 - var_power) {
-                b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
-            } else { if (     link_power == 2.0 - var_power) {
-                b_cumulant = linear_terms / (2.0 - var_power);
-            } else { if ( 2 * link_power == 2.0 - var_power) {
-                b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
-            } else {
-                if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                    power = (2.0 - var_power) / link_power;
-                    b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
-                } else {isNaN = 1;}
-            }}}}
-        }}}}} }}}}}
-        if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
-            log_l = -1.0 / 0.0;
-            isNaN = 1;
-        }
-        if (isNaN == 0)
-        {
-            log_l = sum (Y * natural_parameters - b_cumulant);
-            if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-                log_l = -1.0 / 0.0;
-                isNaN = 1;
-    }   }   }
-    
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-    
-        [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
-        
-        if (isNaN == 0) {            
-            does_prob_contradict = ppred (Y_prob, 0.0, "<=");
-            if (sum (does_prob_contradict * abs (Y)) == 0.0) {
-                log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
-                if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-                    isNaN = 1;
-                }
-            } else {
-                log_l = -1.0 / 0.0;
-                isNaN = 1;
-    }   }   }
-    
-    if (isNaN == 1) {
-        log_l = - 1.0 / 0.0; 
-    }
-}
-
-
-
-binomial_probability_two_column =
-    function (Matrix[double] linear_terms, int link_type, double link_power)
-    return   (Matrix[double] Y_prob, int isNaN)
-{
-    isNaN = 0;
-    num_records = nrow (linear_terms);
-
-    # Define some auxiliary matrices
-
-    ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = ones_2;
-    p_one_m_one [1, 2] = -1.0;
-    m_one_p_one = ones_2;
-    m_one_p_one [1, 1] = -1.0;
-    zero_one = ones_2;
-    zero_one [1, 1] = 0.0;
-    one_zero = ones_2;
-    one_zero [1, 2] = 0.0;
-
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    ones_r = 1.0 + zeros_r;
-
-    # Begin the function body
-
-    Y_prob = zeros_r %*% ones_2;
-    if (link_type == 1) { # Binomial.power
-        if          (link_power == 0.0) { # Binomial.log
-            Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else { if (link_power == 0.5) { # Binomial.sqrt
-            Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else {                          # Binomial.power_nonlog
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-                Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    
-            } else {isNaN = 1;}
-        }}
-    } else {              # Binomial.non_power
-        is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-        is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-        finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-        finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-        if (link_type == 2)             { # Binomial.logit
-            Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-            Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-        } else { if (link_type == 3)    { # Binomial.probit
-            lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
-            t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-            pt_gp = t_gp * ( 0.254829592 
-                  + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                  + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                  + t_gp * (-1.453152027 
-                  + t_gp *   1.061405429))));
-            the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
-            Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);
-        } else { if (link_type == 4)    { # Binomial.cloglog
-            the_exp = exp (finite_linear_terms);
-            the_exp_exp = exp (- the_exp);
-            is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-            Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
-            Y_prob [, 2] = the_exp_exp;
-        } else { if (link_type == 5)    { # Binomial.cauchit
-            Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-        } else {
-            isNaN = 1;
-        }}}}
-        Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-}   }            
-
-
-# THE CG-STEIHAUG PROCEDURE SCRIPT
-
-# Apply Conjugate Gradient - Steihaug algorithm in order to approximately minimize
-# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
-# under constraint:  ||z|| <= trust_delta.
-# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this transform
-# is given separately because sparse "X" may become dense after applying the transform.
-#
-get_CG_Steihaug_point =
-    function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,
-    Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)
-    return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)
-{
-    trust_delta_sq = trust_delta ^ 2;
-    size_CG = nrow (g);
-    z = matrix (0.0, rows = size_CG, cols = 1);
-    neg_log_l_change = 0.0;
-    reached_trust_boundary = 0;
-    g_reg = g + lambda * beta;
-    r_CG = g_reg;
-    p_CG = -r_CG;
-    rr_CG = sum(r_CG * r_CG);
-    eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
-    converged_CG = 0;
-    if (rr_CG < eps_CG) {
-        converged_CG = 1;
-    }
-    
-    max_iteration_CG = max_iter_CG;
-    if (max_iteration_CG <= 0) {
-        max_iteration_CG = size_CG;
-    }
-    i_CG = 0;
-    while (converged_CG == 0)
-    {
-        i_CG = i_CG + 1;
-        ssX_p_CG = diag (scale_X) %*% p_CG;
-        ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
-        temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
-        q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];
-        pq_CG = sum (p_CG * q_CG);
-        if (pq_CG <= 0) {
-            pp_CG = sum (p_CG * p_CG);  
-            if (pp_CG > 0) {
-                [z, neg_log_l_change] = 
-                    get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-                reached_trust_boundary = 1;
-            } else {
-                neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-            }
-            converged_CG = 1;
-        }
-        if (converged_CG == 0) {
-            alpha_CG = rr_CG / pq_CG;
-            new_z = z + alpha_CG * p_CG;
-            if (sum(new_z * new_z) >= trust_delta_sq) {
-                pp_CG = sum (p_CG * p_CG);  
-                [z, neg_log_l_change] = 
-                    get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-                reached_trust_boundary = 1;
-                converged_CG = 1;
-            }
-            if (converged_CG == 0) {
-                z = new_z;
-                old_rr_CG = rr_CG;
-                r_CG = r_CG + alpha_CG * q_CG;
-                rr_CG = sum(r_CG * r_CG);
-                if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
-                    neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-                    reached_trust_boundary = 0;
-                    converged_CG = 1;
-                }
-                if (converged_CG == 0) {
-                    p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
-}   }   }   }   }
-
-
-# An auxiliary function used twice inside the CG-STEIHAUG loop:
-get_trust_boundary_point = 
-    function (Matrix[double] g, Matrix[double] z, Matrix[double] p, 
-              Matrix[double] q, Matrix[double] r, double pp, double pq, 
-              double trust_delta_sq)
-    return (Matrix[double] new_z, double f_change)
-{
-    zz = sum (z * z);  pz = sum (p * z);
-    sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
-    tau_1 = (- pz + sq_root_d) / pp;
-    tau_2 = (- pz - sq_root_d) / pp;
-    zq = sum (z * q);  gp = sum (g * p);
-    f_extra = 0.5 * sum (z * (r + g));
-    f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
-    f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
-    if (f_change_1 < f_change_2) {
-        new_z = z + (tau_1 * p);
-        f_change = f_change_1;
-    }
-    else {
-        new_z = z + (tau_2 * p);
-        f_change = f_change_2;
-    }
-}
-
-
-# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
-straightenX =
-    function (Matrix[double] X, double eps, int max_iter_CG)
-    return   (Matrix[double] w)
-{
-    w_X = t(colSums(X));
-    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
-    eps_LS = eps * nrow(X);
-
-    # BEGIN LEAST SQUARES
-    
-    r_LS = - w_X;
-    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
-    p_LS = - r_LS;
-    norm_r2_LS = sum (r_LS ^ 2);
-    i_LS = 0;
-    while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
-    {
-        q_LS = t(X) %*% X %*% p_LS;
-        q_LS = q_LS + lambda_LS * p_LS;
-        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
-        z_LS = z_LS + alpha_LS * p_LS;
-        old_norm_r2_LS = norm_r2_LS;
-        r_LS = r_LS + alpha_LS * q_LS;
-        norm_r2_LS = sum (r_LS ^ 2);
-        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
-        i_LS = i_LS + 1;
-    }
-    
-    # END LEAST SQUARES
-    
-    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-}
-
-
-round_to_print = function (double x_to_truncate)
-return (double mantissa, int eee)
-{
-    mantissa = 1.0;
-    eee = 0;
-    positive_infinity = 1.0 / 0.0;
-    x = abs (x_to_truncate);
-    if (x != x / 2.0) {
-        log_ten = log (10.0);
-        d_eee = round (log (x) / log_ten - 0.5);
-        mantissa = round (x * exp (log_ten * (4.0 - d_eee))) / 10000;
-        if (mantissa == 10.0) {
-            mantissa = 1.0;
-            d_eee = d_eee + 1;
-        }
-        if (x_to_truncate < 0.0) {
-            mantissa = - mantissa;
-        }
-        eee = 0;
-        pow_two = 1;
-        res_eee = abs (d_eee);
-        while (res_eee != 0.0) {
-            new_res_eee = round (res_eee / 2.0 - 0.3);
-            if (new_res_eee * 2.0 < res_eee) {
-                eee = eee + pow_two;
-            }
-            res_eee = new_res_eee;
-            pow_two = 2 * pow_two;
-        }
-        if (d_eee < 0.0) {
-            eee = - eee;
-        }
-    } else { mantissa = x_to_truncate; }
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# THIS SCRIPT SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST REGIONS
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X     String  ---     Location to read the matrix X of feature vectors
+# Y     String  ---     Location to read response matrix Y with either 1 or 2 columns:
+#                       if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
+# B     String  ---     Location to store estimated regression parameters (the betas)
+# fmt   String "text"   The betas matrix output format, such as "text" or "csv"
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
+# dfam  Int     1       Distribution family code: 1 = Power, 2 = Binomial
+# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link  Int     0       Link function code: 0 = canonical (depends on distribution),
+#                       1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
+# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
+#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# yneg  Double  0.0     Response value for Bernoulli "No" label, usually 0.0 or -1.0
+# icpt  Int     0       Intercept presence, X columns shifting and rescaling:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg   Double  0.0     Regularization parameter (lambda) for L2 regularization
+# tol   Double 0.000001 Tolerance (epsilon)
+# disp  Double  0.0     (Over-)dispersion value, or 0.0 to estimate it from data
+# moi   Int     200     Maximum number of outer (Newton / Fisher Scoring) iterations
+# mii   Int     0       Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: Matrix beta, whose size depends on icpt:
+#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+#
+# In addition, some GLM statistics are provided in CSV format, one comma-separated name-value
+# pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# TERMINATION_CODE      A positive integer indicating success/failure as follows:
+#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
+#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
+# BETA_MIN_INDEX        Column index for the smallest beta value
+# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
+# BETA_MAX_INDEX        Column index for the largest beta value
+# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
+#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+# DISPERSION_EST        Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
+# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
+# -------------------------------------------------------------------------------------------
+#
+# The Log file, when requested, contains the following per-iteration variables in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
+# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
+# OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
+# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
+# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
+# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
+# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
+# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
+# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
+# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
+# TRUST_DELTA           Updated trust region size, the "delta"
+# -------------------------------------------------------------------------------------------
+#
+# Example with distribution = "Binomial.logit":
+# hadoop jar SystemML.jar -f GLM_HOME/GLM.dml -nvargs dfam=2 link=2 yneg=-1.0 icpt=2 reg=0.001
+#     tol=0.00000001 disp=1.0 moi=100 mii=10 X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas 
+#     fmt=csv O=OUTPUT_DIR/stats Log=OUTPUT_DIR/log
+#
+# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
+# AND LINK FUNCTIONS:
+# -----------------------------------------------
+# INPUT PARAMETERS:    MEANING:            Cano-
+# dfam vpow link lpow  Distribution.link   nical?
+# -----------------------------------------------
+#  1   0.0   1  -1.0   Gaussian.inverse
+#  1   0.0   1   0.0   Gaussian.log
+#  1   0.0   1   1.0   Gaussian.id          Yes
+#  1   1.0   1   0.0   Poisson.log          Yes
+#  1   1.0   1   0.5   Poisson.sqrt
+#  1   1.0   1   1.0   Poisson.id
+#  1   2.0   1  -1.0   Gamma.inverse        Yes
+#  1   2.0   1   0.0   Gamma.log
+#  1   2.0   1   1.0   Gamma.id
+#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+#  1   3.0   1  -1.0   InvGaussian.inverse
+#  1   3.0   1   0.0   InvGaussian.log
+#  1   3.0   1   1.0   InvGaussian.id
+#  1    *    1    *    AnyVariance.AnyLink
+# -----------------------------------------------
+#  2    *    1   0.0   Binomial.log
+#  2    *    1   0.5   Binomial.sqrt
+#  2    *    2    *    Binomial.logit       Yes
+#  2    *    3    *    Binomial.probit
+#  2    *    4    *    Binomial.cloglog
+#  2    *    5    *    Binomial.cauchit
+# -----------------------------------------------
+
+
+# Default values for input parameters
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileO = ifdef ($O, " ");
+fileLog = ifdef ($Log, " ");
+fmtB = ifdef ($fmt, "text");
+
+distribution_type = ifdef ($dfam, 1);                # $dfam = 1;
+variance_as_power_of_the_mean = ifdef ($vpow, 0.0);  # $vpow = 0.0;
+link_type = ifdef ($link, 0);                        # $link = 0;
+link_as_power_of_the_mean = ifdef ($lpow, 1.0);      # $lpow = 1.0;
+bernoulli_No_label = ifdef ($yneg, 0.0);             # $yneg = 0.0;
+intercept_status = ifdef ($icpt, 0);                 # $icpt = 0;
+dispersion = ifdef ($disp, 0.0);                     # $disp = 0.0;
+regularization = ifdef ($reg, 0.0);                  # $reg  = 0.0;
+eps = ifdef ($tol, 0.000001);                        # $tol  = 0.000001;
+max_iteration_IRLS = ifdef ($moi, 200);              # $moi  = 200;
+max_iteration_CG = ifdef ($mii, 0);                  # $mii  = 0;
+
+variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
+link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
+bernoulli_No_label = as.double (bernoulli_No_label);
+dispersion = as.double (dispersion);
+eps = as.double (eps);
+
+
+# Default values for output statistics:
+
+termination_code     = 0.0;
+min_beta             = 0.0 / 0.0;
+i_min_beta           = 0.0 / 0.0;
+max_beta             = 0.0 / 0.0;
+i_max_beta           = 0.0 / 0.0;
+intercept_value      = 0.0 / 0.0;
+dispersion           = 0.0 / 0.0;
+estimated_dispersion = 0.0 / 0.0;
+deviance_nodisp      = 0.0 / 0.0;
+deviance             = 0.0 / 0.0;
+
+print("BEGIN GLM SCRIPT");
+print("Reading X...");
+X = read (fileX);
+print("Reading Y...");
+Y = read (fileY);
+
+num_records  = nrow (X);
+num_features = ncol (X);
+zeros_r = matrix (0, rows = num_records, cols = 1);
+ones_r = 1 + zeros_r;
+
+# Introduce the intercept, shift and rescale the columns of X if needed
+
+if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
+{
+    X = append (X, ones_r);
+    num_features = ncol (X);
+}
+
+scale_lambda = matrix (1, rows = num_features, cols = 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+    scale_lambda [num_features, 1] = 0;
+}
+
+if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+{                           # Important assumption: X [, num_features] = ones_r
+    avg_X_cols = t(colSums(X)) / num_records;
+    var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
+    is_unsafe = ppred (var_X_cols, 0.0, "<=");
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [num_features, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [num_features, 1] = 0;
+    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
+} else {
+    scale_X = matrix (1, rows = num_features, cols = 1);
+    shift_X = matrix (0, rows = num_features, cols = 1);
+    rowSums_X_sq = rowSums (X ^ 2);
+}
+
+# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
+# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
+# The transform is then associatively applied to the other side of the expression,
+# and is rewritten via "scale_X" and "shift_X" as follows:
+#
+# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+# ssX_A  = diag (scale_X) %*% A;
+# ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
+#
+# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+# tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
+
+# Initialize other input-dependent parameters
+
+lambda = scale_lambda * regularization;
+if (max_iteration_CG == 0) {
+    max_iteration_CG = num_features;
+}
+
+# In Bernoulli case, convert one-column "Y" into two-column
+
+if (distribution_type == 2 & ncol(Y) == 1)
+{
+    is_Y_negative = ppred (Y, bernoulli_No_label, "==");
+    Y = append (1 - is_Y_negative, is_Y_negative);
+    count_Y_negative = sum (is_Y_negative);
+    if (count_Y_negative == 0) {
+        stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
+    }
+    if (count_Y_negative == nrow(Y)) {
+        stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
+    }
+}
+
+# Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
+
+if (link_type == 0)
+{
+    if (distribution_type == 1) {
+        link_type = 1;
+        link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
+    } else { if (distribution_type == 2) {
+            link_type = 2;
+}   }   }
+
+# For power distributions and/or links, we use two constants,
+# "variance as power of the mean" and "link_as_power_of_the_mean",
+# to specify the variance and the link as arbitrary powers of the
+# mean.  However, the variance-powers of 1.0 (Poisson family) and
+# 2.0 (Gamma family) have to be treated as special cases, because
+# these values integrate into logarithms.  The link-power of 0.0
+# is also special as it represents the logarithm link.
+
+num_response_columns = ncol (Y);
+
+is_supported = check_if_supported (num_response_columns, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+if (is_supported == 1)
+{
+
+#####   INITIALIZE THE BETAS   #####
+
+[beta, saturated_log_l, isNaN] = 
+    glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
+if (isNaN == 0)
+{
+
+#####  START OF THE MAIN PART  #####
+
+sum_X_sq = sum (rowSums_X_sq);
+trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
+###  max_trust_delta = trust_delta * 10000.0;
+log_l = 0.0;
+deviance_nodisp = 0.0;
+new_deviance_nodisp = 0.0;
+isNaN_log_l = 2;
+newbeta = beta;
+g = matrix (0.0, rows = num_features, cols = 1);
+g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+accept_new_beta = 1;
+reached_trust_boundary = 0;
+neg_log_l_change_predicted = 0.0;
+i_IRLS = 0;
+
+print ("BEGIN IRLS ITERATIONS...");
+
+ssX_newbeta = diag (scale_X) %*% newbeta;
+ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+all_linear_terms = X %*% ssX_newbeta;
+
+[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+    (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+if (isNaN_new_log_l == 0) {
+    new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+    new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+}
+
+if (fileLog != " ") {
+    log_str = "POINT_STEP_NORM," + i_IRLS + "," + sqrt (sum (beta ^ 2));
+    log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
+    log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
+    log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
+} else {
+    log_str = " ";
+}
+
+while (termination_code == 0)
+{
+    accept_new_beta = 1;
+    
+    if (i_IRLS > 0)
+    {
+        if (isNaN_log_l == 0) {
+            accept_new_beta = 0;
+        }
+
+# Decide whether to accept a new iteration point and update the trust region
+# See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
+
+        rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
+        if (rho < 0.25 | isNaN_new_log_l == 1) {
+            trust_delta = 0.25 * trust_delta;
+        }
+        if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
+            trust_delta = 2 * trust_delta;
+            
+### if (trust_delta > max_trust_delta) {
+###     trust_delta = max_trust_delta;
+### }
+
+        }
+        if (rho > 0.1 & isNaN_new_log_l == 0) {
+            accept_new_beta = 1;
+        }
+    }
+
+    if (fileLog != " ") {
+        log_str = append (log_str, "IS_POINT_UPDATED," + i_IRLS + "," + accept_new_beta);
+        log_str = append (log_str, "TRUST_DELTA,"      + i_IRLS + "," + trust_delta);
+    }
+    if (accept_new_beta == 1)
+    {
+        beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
+        
+        [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+        
+        # We introduced these variables to avoid roundoff errors:
+        #     g_Y = y_residual / (y_var * link_grad);
+        #     w   = 1.0 / (y_var * link_grad * link_grad);
+                      
+        gXY = - t(X) %*% g_Y;
+        g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
+        g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+        
+        if (fileLog != " ") {
+            log_str = append (log_str, "GRADIENT_NORM," + i_IRLS + "," + g_norm);
+        }
+    }
+    
+    [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
+        get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
+
+    newbeta = beta + z;
+    
+    ssX_newbeta = diag (scale_X) %*% newbeta;
+    ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+    all_linear_terms = X %*% ssX_newbeta;
+    
+    [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+        (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+    if (isNaN_new_log_l == 0) {
+        new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+        new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+    }
+        
+    log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
+
+    if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
+        (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )  
+    {
+        termination_code = 1;
+    }
+    rho = - log_l_change / neg_log_l_change_predicted;
+    z_norm = sqrt (sum (z * z));
+    
+    [z_norm_m, z_norm_e] = round_to_print (z_norm);
+    [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
+    [rho_m, rho_e] = round_to_print (rho);
+    [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
+    [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
+    [g_norm_m, g_norm_e] = round_to_print (g_norm);
+
+    i_IRLS = i_IRLS + 1;
+    print ("Iter #" + i_IRLS + " completed"
+        + ", ||z|| = " + z_norm_m + "E" + z_norm_e
+        + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
+        + ", reached = " + reached_trust_boundary
+        + ", ||g|| = " + g_norm_m + "E" + g_norm_e
+        + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
+        + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
+        + ", rho = " + rho_m + "E" + rho_e);
+        
+    if (fileLog != " ") {
+        log_str = append (log_str, "NUM_CG_ITERS,"     + i_IRLS + "," + num_CG_iters);
+        log_str = append (log_str, "IS_TRUST_REACHED," + i_IRLS + "," + reached_trust_boundary);
+        log_str = append (log_str, "POINT_STEP_NORM,"  + i_IRLS + "," + z_norm);
+        log_str = append (log_str, "OBJECTIVE,"        + i_IRLS + "," + (- new_log_l));
+        log_str = append (log_str, "OBJ_DROP_REAL,"    + i_IRLS + "," + log_l_change);
+        log_str = append (log_str, "OBJ_DROP_PRED,"    + i_IRLS + "," + (- neg_log_l_change_predicted));
+        log_str = append (log_str, "OBJ_DROP_RATIO,"   + i_IRLS + "," + rho);
+        log_str = append (log_str, "LINEAR_TERM_MIN,"  + i_IRLS + "," + min (all_linear_terms));
+        log_str = append (log_str, "LINEAR_TERM_MAX,"  + i_IRLS + "," + max (all_linear_terms));
+    }
+        
+    if (i_IRLS == max_iteration_IRLS) {
+        termination_code = 2;
+    }
+}
+
+beta = newbeta;
+log_l = new_log_l;
+deviance_nodisp = new_deviance_nodisp;
+
+if (termination_code == 1) {
+    print ("Converged in " + i_IRLS + " steps.");
+} else {
+    print ("Did not converge.");
+}
+
+ssX_beta = diag (scale_X) %*% beta;
+ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
+if (intercept_status == 2) {
+    beta_out = append (ssX_beta, beta);
+} else {
+    beta_out = ssX_beta;
+}
+
+write (beta_out, fileB, format=fmtB);
+
+if (intercept_status == 1 | intercept_status == 2) {
+    intercept_value = castAsScalar (beta_out [num_features, 1]);
+    beta_noicept = beta_out [1 : (num_features - 1), 1];
+} else {
+    beta_noicept = beta_out [1 : num_features, 1];
+}
+min_beta = min (beta_noicept);
+max_beta = max (beta_noicept);
+tmp_i_min_beta = rowIndexMin (t(beta_noicept))
+i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
+tmp_i_max_beta = rowIndexMax (t(beta_noicept))
+i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
+
+#####  OVER-DISPERSION PART  #####
+
+all_linear_terms = X %*% ssX_beta;
+[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+    
+pearson_residual_sq = g_Y ^ 2 / w;
+pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
+# pearson_residual_sq = (y_residual ^ 2) / y_var;
+
+if (num_records > num_features) {
+    estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
+}
+if (dispersion <= 0.0) {
+    dispersion = estimated_dispersion;
+}
+deviance = deviance_nodisp / dispersion;
+
+if (fileLog != " ") {
+    write (log_str, fileLog);
+}
+
+#####  END OF THE MAIN PART  #####
+
+} else { print ("Input matrices are out of range.  Terminating the DML."); termination_code = 3; }
+} else { print ("Distribution/Link not supported.  Terminating the DML."); termination_code = 4; }
+
+str = "TERMINATION_CODE," + termination_code;
+str = append (str, "BETA_MIN," + min_beta);
+str = append (str, "BETA_MIN_INDEX," + i_min_beta);
+str = append (str, "BETA_MAX," + max_beta);
+str = append (str, "BETA_MAX_INDEX," + i_max_beta);
+str = append (str, "INTERCEPT," + intercept_value);
+str = append (str, "DISPERSION," + dispersion);
+str = append (str, "DISPERSION_EST," + estimated_dispersion);
+str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
+str = append (str, "DEVIANCE_SCALED," + deviance);
+
+if (fileO != " ") {
+    write (str, fileO);
+} else {
+    print (str);
+}
+
+
+
+
+check_if_supported = 
+    function (int ncol_y, int dist_type, double var_power, int link_type, double link_power)
+    return   (int is_supported)
+{
+    is_supported = 0;
+    if (ncol_y == 1 & dist_type == 1 & link_type == 1)
+    { # POWER DISTRIBUTION
+        is_supported = 1;
+        if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse");      } else {
+        if (var_power == 0.0 & link_power ==  0.0) {print ("Gaussian.log");          } else {
+        if (var_power == 0.0 & link_power ==  0.5) {print ("Gaussian.sqrt");         } else {
+        if (var_power == 0.0 & link_power ==  1.0) {print ("Gaussian.id");           } else {
+        if (var_power == 0.0                     ) {print ("Gaussian.power_nonlog"); } else {
+        if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse");       } else {
+        if (var_power == 1.0 & link_power ==  0.0) {print ("Poisson.log");           } else {
+        if (var_power == 1.0 & link_power ==  0.5) {print ("Poisson.sqrt");          } else {
+        if (var_power == 1.0 & link_power ==  1.0) {print ("Poisson.id");            } else {
+        if (var_power == 1.0                     ) {print ("Poisson.power_nonlog");  } else {
+        if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse");         } else {
+        if (var_power == 2.0 & link_power ==  0.0) {print ("Gamma.log");             } else {
+        if (var_power == 2.0 & link_power ==  0.5) {print ("Gamma.sqrt");            } else {
+        if (var_power == 2.0 & link_power ==  1.0) {print ("Gamma.id");              } else {
+        if (var_power == 2.0                     ) {print ("Gamma.power_nonlog");    } else {
+        if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2");    } else {
+        if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse");   } else {
+        if (var_power == 3.0 & link_power ==  0.0) {print ("InvGaussian.log");       } else {
+        if (var_power == 3.0 & link_power ==  0.5) {print ("InvGaussian.sqrt");      } else {
+        if (var_power == 3.0 & link_power ==  1.0) {print ("InvGaussian.id");        } else {
+        if (var_power == 3.0                     ) {print ("InvGaussian.power_nonlog");}else{
+        if (                   link_power ==  0.0) {print ("PowerDist.log");         } else {
+                                                    print ("PowerDist.power_nonlog");
+    }   }}}}} }}}}} }}}}} }}}}} }}
+    if (ncol_y == 1 & dist_type == 2)
+    {
+        print ("Error: Bernoulli response matrix has not been converted into two-column format.");
+    }
+    if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
+    { # BINOMIAL/BERNOULLI DISTRIBUTION
+        is_supported = 1;
+        if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse");        } else {
+        if (link_type == 1 & link_power ==  0.0) {print ("Binomial.log");            } else {
+        if (link_type == 1 & link_power ==  0.5) {print ("Binomial.sqrt");           } else {
+        if (link_type == 1 & link_power ==  1.0) {print ("Binomial.id");             } else {
+        if (link_type == 1)                      {print ("Binomial.power_nonlog");   } else {
+        if (link_type == 2)                      {print ("Binomial.logit");          } else {
+        if (link_type == 3)                      {print ("Binomial.probit");         } else {
+        if (link_type == 4)                      {print ("Binomial.cloglog");        } else {
+        if (link_type == 5)                      {print ("Binomial.cauchit");        }
+    }   }}}}} }}}
+    if (is_supported == 0) {
+        print ("Response matrix with " + ncol_y + " columns, distribution family (" + dist_type + ", " + var_power
+             + ") and link family (" + link_type + ", " + link_power + ") are NOT supported together.");
+    }
+}
+
+glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
+return (Matrix[double] beta, double saturated_log_l, int isNaN)
+{
+    saturated_log_l = 0.0;
+    isNaN = 0;
+    y_corr = Y [, 1];
+    if (dist_type == 2) {
+        n_corr = rowSums (Y);
+        is_n_zero = ppred (n_corr, 0.0, "==");
+        y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
+    }
+    linear_terms = y_corr;
+    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
+        if          (link_power ==  0.0) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_power ==  1.0) {
+            linear_terms = y_corr;
+        } else { if (link_power == -1.0) {
+            linear_terms = 1.0 / y_corr;
+        } else { if (link_power ==  0.5) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                linear_terms = sqrt (y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_power >   0.0) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+            } else { isNaN = 1; }
+        } else {
+            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+                linear_terms = y_corr ^ link_power;
+            } else { isNaN = 1; }
+        }}}}}
+    }
+    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+    { # BINOMIAL/BERNOULLI DISTRIBUTION
+        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+            } else { isNaN = 1; }
+        } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
+            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+                linear_terms = y_corr ^ link_power;
+            } else { isNaN = 1; }
+        } else { 
+            is_zero_y_corr = ppred (y_corr, 0.0, "<=");
+            is_one_y_corr  = ppred (y_corr, 1.0, ">=");
+            y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
+            if (link_type == 2)                           { # Binomial.logit
+                linear_terms = log (y_corr / (1.0 - y_corr)) 
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 3)                  { # Binomial.probit
+                y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
+                t = sqrt (- 2.0 * log (y_below_half));
+                approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
+                linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 4)                  { # Binomial.cloglog
+                linear_terms = log (- log (1.0 - y_corr))
+                    - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 5)                  { # Binomial.cauchit
+                linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        }}  }}}}}
+    }
+    
+    if (isNaN == 0) {
+        [saturated_log_l, isNaN] = 
+            glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
+    }
+    
+    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
+        (dist_type == 2 & link_type >= 2))
+    {    
+        desired_eta = 0.0;
+    } else { if (link_type == 1 & link_power == 0.0) {
+        desired_eta = log (0.5);
+    } else { if (link_type == 1) {
+        desired_eta = 0.5 ^ link_power;
+    } else {
+        desired_eta = 0.5;
+    }}}
+    
+    beta = matrix (0.0, rows = ncol(X), cols = 1);
+    
+    if (desired_eta != 0.0) {
+        if (icept_status == 1 | icept_status == 2) {
+            beta [nrow(beta), 1] = desired_eta;
+        } else {
+            # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
+            # Note that "ssX_transform" is trivial here, hence ignored
+            
+            beta = straightenX (X, 0.000001, max_iter_CG);  
+            beta = beta * desired_eta;
+}   }   }
+
+
+glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
+                     int dist_type, double var_power, int link_type, double link_power)
+    return (Matrix[double] g_Y, Matrix[double] w)
+    # ORIGINALLY we returned more meaningful vectors, namely:
+    # Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
+    # Matrix[double] link_gradient : derivative of the link function
+    # Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
+    # BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
+    # and skip over the "meaningful intermediaries".  Now we output these two variables:
+    #     g_Y = y_residual / (var_function * link_gradient);
+    #     w   = 1.0 / (var_function * link_gradient ^ 2);
+{
+    num_records = nrow (l

<TRUNCATED>


[25/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/imputeGaussMCMC.nogradient.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/imputeGaussMCMC.nogradient.dml b/src/test/scripts/applications/impute/imputeGaussMCMC.nogradient.dml
index f30698c..00210c5 100644
--- a/src/test/scripts/applications/impute/imputeGaussMCMC.nogradient.dml
+++ b/src/test/scripts/applications/impute/imputeGaussMCMC.nogradient.dml
@@ -1,453 +1,453 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
-# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
-# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
-# Soft linear regression constraints define dependencies between values in/across the reports.
-# Linear regression parameters are unknown and sampled together with the missing values in the reports.
-#
-# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
-#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
-#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
-#
-# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
-#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
-#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
-#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
-#
-# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
-#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
-#
-# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
-#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
-#
-# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
-#          from the vector of parameters to the stretched matrix of regression factors.
-#
-# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
-#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
-#
-# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
-#
-# INPUT 8 : Number of factors in a regression equation, including the estimated value
-# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
-#           BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
-# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
-#
-# INPUT 11: Output file name and path for the average MCMC reports table
-# INPUT 12: Output file for debugging (currently: the average parameters vector)
-#
-# Example:
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
-#    test/scripts/applications/impute/initial_reports
-#    test/scripts/applications/impute/CReps
-#    test/scripts/applications/impute/RegresValueMap
-#    test/scripts/applications/impute/RegresFactorDefault
-#    test/scripts/applications/impute/RegresParamMap
-#    test/scripts/applications/impute/RegresCoeffDefault
-#    test/scripts/applications/impute/RegresScaleMult
-#    4 1000 100
-#    test/scripts/applications/impute/output_reports
-#    test/scripts/applications/impute/debug_info
-
-
-print ("START ImputeGaussMCMC");
-print ("Reading the input files...");
-
-initial_reports = read ($1);
-CReps = read ($2);
-
-num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
-num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
-num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
-
-dReps_size = num_terms * num_attrs;
-dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
-
-# We assume that all report-series consistent with hard constraints form an affine set:
-#     reports = CReps %*% freeVars + dReps
-# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
-# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
-
-RegresValueMap = read ($3);
-RegresFactorDefault = read ($4);
-RegresParamMap = read ($5); 
-RegresCoeffDefault = read ($6); 
-RegresScaleMult = read ($7);
-
-num_factors = $8;   # Number of factors in each regression equation, including the estimated value
-num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
-num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
-max_num_burnin_iterations = $9;
-max_num_observed_iterations = $10;
-
-num_opt_iter = 20;
-print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
-
-freeVars = matrix (0.0, rows = num_frees, cols = 1);
-params = matrix (1.0, rows = num_params, cols = 1);
-reports = CReps %*% freeVars + dReps;
-
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-bilinear_vector = regresValues * regresParams;
-
-### DELETE: bilinear_form = matricize (bilinear_vector, num_factors);
-bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-
-opt_iter = 1;
-is_step_params = 1;
-is_opt_converged = 0;
-
-print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
-        
-
-while (is_opt_converged == 0)
-{
-    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
-
-    # Compute gradient
-
-    gradient = matrix (0.0, rows = deg, cols = 1);
-    for (i in 1:deg)
-    {
-        if (is_step_params == 1) {
-            bilinear_vector = regresValues * RegresParamMap [, i];
-        } else {
-            bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
-        }
-        ### DELETE: bilinear_updater = matricize (bilinear_vector, num_factors);
-        bilinear_updater = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);        
-        
-        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
-        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
-        gradient [i, 1] = 0.5 * (q_plus_1 - q_minus_1) + gradient [i, 1];
-    }
-    
-    # Make a few conjugate gradient steps
-    
-    shift_vector = matrix (0.0, rows = deg, cols = 1);
-    residual = gradient;
-    p = - residual;
-    norm_r2 = sum (residual * residual);
-
-    for (j in 1:3)
-    {
-        q = matrix (0.0, rows = deg, cols = 1);
-        for (i in 1:deg) # Want: q = A %*% p;
-        {
-            if (is_step_params == 1) {
-                bilinear_vector = regresValues * RegresParamMap [, i];
-            } else {
-                bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
-            }
-            ### DELETE: bilinear_updater_1 = matricize (bilinear_vector, num_factors);
-            bilinear_updater_1 = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-
-            if (is_step_params == 1) {
-                bilinear_vector = regresValues * (RegresParamMap %*% p);
-            } else {
-                bilinear_vector = (RegresValueMap %*% CReps %*% p) * regresParams;
-            }
-            ### DELETE: bilinear_updater_p = matricize (bilinear_vector, num_factors);
-            bilinear_updater_p = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-
-            quadratic_plus_1  = 
-                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_1) * rowSums (bilinear_form + bilinear_updater_1));
-            quadratic_plus_p  = 
-                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_p) * rowSums (bilinear_form + bilinear_updater_p));
-            quadratic_plus_both = 
-                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p) * rowSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p));
-            q [i, 1] = (quadratic_plus_both - quadratic_plus_1 - quadratic_plus_p + bilinear_form_value) + q [i, 1];
-        }
-        
-        alpha = norm_r2 / castAsScalar (t(p) %*% q);
-        shift_vector = shift_vector + alpha * p;
-        old_norm_r2 = norm_r2;
-        residual = residual + alpha * q;
-        norm_r2 = sum (residual * residual);
-        p = - residual + (norm_r2 / old_norm_r2) * p;
-    }
-
-    if (is_step_params == 1) {
-        params = params + shift_vector;
-        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    } else {
-        freeVars = freeVars + shift_vector;
-        reports = CReps %*% freeVars + dReps;
-        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    }
-    
-    # Update the bilinear form and check convergence
-
-    if (is_step_params == 1) {
-        old_bilinear_form_value = bilinear_form_value;
-    }
-    bilinear_vector = regresValues * regresParams;
-    
-    ### DELETE: bilinear_form = matricize (bilinear_vector, num_factors);
-    bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-        
-    if (is_step_params == 1) {
-        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
-    } else {
-        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
-    }
-    
-    is_step_params = 1 - is_step_params;
-    opt_iter = opt_iter + 1;
-
-    if (is_step_params == 1 & opt_iter > num_opt_iter) {
-        is_opt_converged = 1;
-    }
-}
-
-print ("Performing MCMC initialization...");
-
-max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
-dim_sample = num_frees + num_params;
-sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
-
-# Generate a random permutation matrix for the sampling order of freeVars and params
-
-SampleOrder = diag (sample_ones);
-num_swaps = 10 * dim_sample;
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-left_swap  = round (0.5 + dim_sample * rnd);
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-right_swap = round (0.5 + dim_sample * rnd);
-for (swap_i in 1:num_swaps) {
-    l = castAsScalar (left_swap  [swap_i, 1]);
-    r = castAsScalar (right_swap [swap_i, 1]);
-    if (l != r) {
-        tmp_row = SampleOrder [l, ];
-        SampleOrder [l, ] = SampleOrder [r, ];
-        SampleOrder [r, ] = tmp_row;
-    }
-}
-
-pi = 3.1415926535897932384626433832795;
-zero = matrix (0.0, rows = 1, cols = 1);
-
-isVar = colSums (SampleOrder [1 : num_frees, ]);
-sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
-num_of_observed_reports = 0;
-sum_of_observed_losses = 0.0;
-is_observed = 0;
-
-is_calculating_loss_change = 0;
-is_monitoring_loss_change = 0;
-avg_prob_of_loss_increase = 0;
-update_factor_for_avg_loss_change = 0.02;
-avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
-old_bilinear_form_value = bilinear_form_value;
-
-# Starting MCMC iterations
-
-iter = 0;
-
-while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
-{
-    iter = iter + 1;
-
-    # Initialize (bi-)linear forms
-    
-    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    bilinear_form_vector = regresValues * regresParams;
-    
-    ### DELETE: bilinear_form = matricize (bilinear_form_vector, num_factors);
-    bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-    
-    if (bilinear_form_value > old_bilinear_form_value) {
-        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
-    } else {
-        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
-    }
-    if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
-        is_calculating_loss_change = 1;
-    }
-    if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
-        is_calculating_loss_change = 1;
-        is_monitoring_loss_change = 1;
-        print ("Monitoring the average loss change is ON.        ");
-    }
-    if (is_calculating_loss_change == 1) {
-        avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) 
-            + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
-    }
-    if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
-        print ("Burn-in ENDS, observation STARTS.        ");
-        is_observed = 1;
-    }
-    
-    old_bilinear_form_value = bilinear_form_value;
-    
-    bilinear_form_value_to_print = bilinear_form_value;
-    if (bilinear_form_value < 100000) {
-        bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
-    } else {
-    if (bilinear_form_value < 1000000000) {
-        bilinear_form_value_to_print = round (bilinear_form_value);
-    }}
-
-    if (is_monitoring_loss_change == 0) {
-        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
-            + ",  bilinear form value = " + bilinear_form_value_to_print);
-    } else {
-        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) 
-            + ",  bilinear form value = " + bilinear_form_value_to_print + ",  avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
-    }
-    
-    # Create a normally distributed random sample
-    
-    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
-    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
-    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
-    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
-    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
-    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
-        
-    # Initialize updaters
-    
-    freeVars_updater = freeVars * 0.0;
-    params_updater = params * 0.0;
-    regresValues_updater = regresValues * 0.0;
-    regresParams_updater = regresParams * 0.0;
-    bilinear_updater_vector = bilinear_form_vector * 0.0;
-    
-    # Perform the sampling
-
-    for (idx in 1:dim_sample)
-    {
-        # Generate the sample unit-vector and updaters
-        
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars_updater = SampleOrder [1 : num_frees, idx];
-            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
-            bilinear_updater_vector = regresValues_updater * regresParams;
-        } else {
-            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
-            regresParams_updater = RegresParamMap %*% params_updater;
-            bilinear_updater_vector = regresValues * regresParams_updater;
-        }
-        ### DELETE: bilinear_updater = matricize (bilinear_updater_vector, num_factors);
-        bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-            
-        # Compute the quadratic by three shift-points: -1, 0, +1
-
-        bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
-        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
-        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
-        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
-
-        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
-        # then compute the shift to get the new sample
-            
-        mean_shift  = - coeff_b / (2.0 * coeff_a);
-        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
-        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
-            
-# BEGIN DEBUG INSERT
-# mmm = 1;
-# if (castAsScalar (isVar [1, idx]) > 0.5 &          # IT IS A FREE VARIABLE, NOT A PARAMETER
-#     castAsScalar (freeVars_updater [mmm, 1]) > 0)  # IT IS mmm-TH FREE VARIABLE
-# {
-# #   print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
-#     print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
-# }
-# if (castAsScalar (isVar [1, idx]) <= 0.5 &       # IT IS A PARAMETER, NOT A FREE VARIABLE
-#     castAsScalar (params_updater [mmm, 1]) > 0)  # IT IS mmm-TH PARAMETER
-# {
-# #   print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
-#     print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
-# }
-# END DEBUG INSERT
-
-        # Perform the updates
-
-        bilinear_form = bilinear_form + shift * bilinear_updater;
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars = freeVars + shift * freeVars_updater;
-            regresValues = regresValues + shift * regresValues_updater;
-        } else {
-            params = params + shift * params_updater;
-            regresParams = regresParams + shift * regresParams_updater;
-        }
-    }
-    
-    # Update / adjust the reports and the parameters
-    
-    reports = CReps %*% freeVars + dReps;
-    ### DELETE: reports_matrix = matricize (reports, num_attrs);
-    reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
-        
-    # Make an observation of the reports and/or the parameters
-    
-    if (is_observed > 0)
-    {
-        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
-        num_of_observed_reports = num_of_observed_reports + 1;
-
-        sum_of_observed_params = sum_of_observed_params + params;
-        sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
-    }
-
-# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
-# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
-# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
-# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
-# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
-# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
-# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
-# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
-
-# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
-# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
-# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
-# v10=castAsScalar(round(reports_matrix[10,num_terms]));
-# v16=castAsScalar(round(reports_matrix[16,num_terms]));
-# v19=castAsScalar(round(reports_matrix[19,num_terms]));
-
-#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
-## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
-#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
-## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
-
-}
-
-print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
-print ("Writing out the results...");
-
-avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
-avg_params = sum_of_observed_params / num_of_observed_reports;
-write (avg_reports_matrix, $11, format="text");
-write (avg_params, $12, format="text");
-
-print ("END ImputeGaussMCMC");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
+# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
+# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
+# Soft linear regression constraints define dependencies between values in/across the reports.
+# Linear regression parameters are unknown and sampled together with the missing values in the reports.
+#
+# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
+#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
+#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
+#
+# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
+#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
+#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
+#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
+#
+# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
+#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
+#
+# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
+#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
+#
+# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
+#          from the vector of parameters to the stretched matrix of regression factors.
+#
+# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
+#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
+#
+# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
+#
+# INPUT 8 : Number of factors in a regression equation, including the estimated value
+# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
+#           BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
+# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
+#
+# INPUT 11: Output file name and path for the average MCMC reports table
+# INPUT 12: Output file for debugging (currently: the average parameters vector)
+#
+# Example:
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
+#    test/scripts/applications/impute/initial_reports
+#    test/scripts/applications/impute/CReps
+#    test/scripts/applications/impute/RegresValueMap
+#    test/scripts/applications/impute/RegresFactorDefault
+#    test/scripts/applications/impute/RegresParamMap
+#    test/scripts/applications/impute/RegresCoeffDefault
+#    test/scripts/applications/impute/RegresScaleMult
+#    4 1000 100
+#    test/scripts/applications/impute/output_reports
+#    test/scripts/applications/impute/debug_info
+
+
+print ("START ImputeGaussMCMC");
+print ("Reading the input files...");
+
+initial_reports = read ($1);
+CReps = read ($2);
+
+num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
+num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
+num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
+
+dReps_size = num_terms * num_attrs;
+dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
+
+# We assume that all report-series consistent with hard constraints form an affine set:
+#     reports = CReps %*% freeVars + dReps
+# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
+# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
+
+RegresValueMap = read ($3);
+RegresFactorDefault = read ($4);
+RegresParamMap = read ($5); 
+RegresCoeffDefault = read ($6); 
+RegresScaleMult = read ($7);
+
+num_factors = $8;   # Number of factors in each regression equation, including the estimated value
+num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
+num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
+max_num_burnin_iterations = $9;
+max_num_observed_iterations = $10;
+
+num_opt_iter = 20;
+print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
+
+freeVars = matrix (0.0, rows = num_frees, cols = 1);
+params = matrix (1.0, rows = num_params, cols = 1);
+reports = CReps %*% freeVars + dReps;
+
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+bilinear_vector = regresValues * regresParams;
+
+### DELETE: bilinear_form = matricize (bilinear_vector, num_factors);
+bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+
+opt_iter = 1;
+is_step_params = 1;
+is_opt_converged = 0;
+
+print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
+        
+
+while (is_opt_converged == 0)
+{
+    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
+
+    # Compute gradient
+
+    gradient = matrix (0.0, rows = deg, cols = 1);
+    for (i in 1:deg)
+    {
+        if (is_step_params == 1) {
+            bilinear_vector = regresValues * RegresParamMap [, i];
+        } else {
+            bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
+        }
+        ### DELETE: bilinear_updater = matricize (bilinear_vector, num_factors);
+        bilinear_updater = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);        
+        
+        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
+        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
+        gradient [i, 1] = 0.5 * (q_plus_1 - q_minus_1) + gradient [i, 1];
+    }
+    
+    # Make a few conjugate gradient steps
+    
+    shift_vector = matrix (0.0, rows = deg, cols = 1);
+    residual = gradient;
+    p = - residual;
+    norm_r2 = sum (residual * residual);
+
+    for (j in 1:3)
+    {
+        q = matrix (0.0, rows = deg, cols = 1);
+        for (i in 1:deg) # Want: q = A %*% p;
+        {
+            if (is_step_params == 1) {
+                bilinear_vector = regresValues * RegresParamMap [, i];
+            } else {
+                bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
+            }
+            ### DELETE: bilinear_updater_1 = matricize (bilinear_vector, num_factors);
+            bilinear_updater_1 = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+
+            if (is_step_params == 1) {
+                bilinear_vector = regresValues * (RegresParamMap %*% p);
+            } else {
+                bilinear_vector = (RegresValueMap %*% CReps %*% p) * regresParams;
+            }
+            ### DELETE: bilinear_updater_p = matricize (bilinear_vector, num_factors);
+            bilinear_updater_p = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+
+            quadratic_plus_1  = 
+                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_1) * rowSums (bilinear_form + bilinear_updater_1));
+            quadratic_plus_p  = 
+                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_p) * rowSums (bilinear_form + bilinear_updater_p));
+            quadratic_plus_both = 
+                sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p) * rowSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p));
+            q [i, 1] = (quadratic_plus_both - quadratic_plus_1 - quadratic_plus_p + bilinear_form_value) + q [i, 1];
+        }
+        
+        alpha = norm_r2 / castAsScalar (t(p) %*% q);
+        shift_vector = shift_vector + alpha * p;
+        old_norm_r2 = norm_r2;
+        residual = residual + alpha * q;
+        norm_r2 = sum (residual * residual);
+        p = - residual + (norm_r2 / old_norm_r2) * p;
+    }
+
+    if (is_step_params == 1) {
+        params = params + shift_vector;
+        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    } else {
+        freeVars = freeVars + shift_vector;
+        reports = CReps %*% freeVars + dReps;
+        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    }
+    
+    # Update the bilinear form and check convergence
+
+    if (is_step_params == 1) {
+        old_bilinear_form_value = bilinear_form_value;
+    }
+    bilinear_vector = regresValues * regresParams;
+    
+    ### DELETE: bilinear_form = matricize (bilinear_vector, num_factors);
+    bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+        
+    if (is_step_params == 1) {
+        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
+    } else {
+        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
+    }
+    
+    is_step_params = 1 - is_step_params;
+    opt_iter = opt_iter + 1;
+
+    if (is_step_params == 1 & opt_iter > num_opt_iter) {
+        is_opt_converged = 1;
+    }
+}
+
+print ("Performing MCMC initialization...");
+
+max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
+dim_sample = num_frees + num_params;
+sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
+
+# Generate a random permutation matrix for the sampling order of freeVars and params
+
+SampleOrder = diag (sample_ones);
+num_swaps = 10 * dim_sample;
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+left_swap  = round (0.5 + dim_sample * rnd);
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+right_swap = round (0.5 + dim_sample * rnd);
+for (swap_i in 1:num_swaps) {
+    l = castAsScalar (left_swap  [swap_i, 1]);
+    r = castAsScalar (right_swap [swap_i, 1]);
+    if (l != r) {
+        tmp_row = SampleOrder [l, ];
+        SampleOrder [l, ] = SampleOrder [r, ];
+        SampleOrder [r, ] = tmp_row;
+    }
+}
+
+pi = 3.1415926535897932384626433832795;
+zero = matrix (0.0, rows = 1, cols = 1);
+
+isVar = colSums (SampleOrder [1 : num_frees, ]);
+sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
+num_of_observed_reports = 0;
+sum_of_observed_losses = 0.0;
+is_observed = 0;
+
+is_calculating_loss_change = 0;
+is_monitoring_loss_change = 0;
+avg_prob_of_loss_increase = 0;
+update_factor_for_avg_loss_change = 0.02;
+avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
+old_bilinear_form_value = bilinear_form_value;
+
+# Starting MCMC iterations
+
+iter = 0;
+
+while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
+{
+    iter = iter + 1;
+
+    # Initialize (bi-)linear forms
+    
+    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    bilinear_form_vector = regresValues * regresParams;
+    
+    ### DELETE: bilinear_form = matricize (bilinear_form_vector, num_factors);
+    bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+    bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+    
+    if (bilinear_form_value > old_bilinear_form_value) {
+        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
+    } else {
+        avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
+    }
+    if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
+        is_calculating_loss_change = 1;
+    }
+    if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
+        is_calculating_loss_change = 1;
+        is_monitoring_loss_change = 1;
+        print ("Monitoring the average loss change is ON.        ");
+    }
+    if (is_calculating_loss_change == 1) {
+        avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) 
+            + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
+    }
+    if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
+        print ("Burn-in ENDS, observation STARTS.        ");
+        is_observed = 1;
+    }
+    
+    old_bilinear_form_value = bilinear_form_value;
+    
+    bilinear_form_value_to_print = bilinear_form_value;
+    if (bilinear_form_value < 100000) {
+        bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
+    } else {
+    if (bilinear_form_value < 1000000000) {
+        bilinear_form_value_to_print = round (bilinear_form_value);
+    }}
+
+    if (is_monitoring_loss_change == 0) {
+        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
+            + ",  bilinear form value = " + bilinear_form_value_to_print);
+    } else {
+        print ("MCMC iteration " + iter + ":  Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) 
+            + ",  bilinear form value = " + bilinear_form_value_to_print + ",  avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
+    }
+    
+    # Create a normally distributed random sample
+    
+    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
+    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
+    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
+    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
+    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
+    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
+        
+    # Initialize updaters
+    
+    freeVars_updater = freeVars * 0.0;
+    params_updater = params * 0.0;
+    regresValues_updater = regresValues * 0.0;
+    regresParams_updater = regresParams * 0.0;
+    bilinear_updater_vector = bilinear_form_vector * 0.0;
+    
+    # Perform the sampling
+
+    for (idx in 1:dim_sample)
+    {
+        # Generate the sample unit-vector and updaters
+        
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars_updater = SampleOrder [1 : num_frees, idx];
+            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
+            bilinear_updater_vector = regresValues_updater * regresParams;
+        } else {
+            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
+            regresParams_updater = RegresParamMap %*% params_updater;
+            bilinear_updater_vector = regresValues * regresParams_updater;
+        }
+        ### DELETE: bilinear_updater = matricize (bilinear_updater_vector, num_factors);
+        bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+            
+        # Compute the quadratic by three shift-points: -1, 0, +1
+
+        bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+        q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
+        q_plus_1  = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
+        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
+        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
+
+        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
+        # then compute the shift to get the new sample
+            
+        mean_shift  = - coeff_b / (2.0 * coeff_a);
+        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
+        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
+            
+# BEGIN DEBUG INSERT
+# mmm = 1;
+# if (castAsScalar (isVar [1, idx]) > 0.5 &          # IT IS A FREE VARIABLE, NOT A PARAMETER
+#     castAsScalar (freeVars_updater [mmm, 1]) > 0)  # IT IS mmm-TH FREE VARIABLE
+# {
+# #   print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
+#     print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
+# }
+# if (castAsScalar (isVar [1, idx]) <= 0.5 &       # IT IS A PARAMETER, NOT A FREE VARIABLE
+#     castAsScalar (params_updater [mmm, 1]) > 0)  # IT IS mmm-TH PARAMETER
+# {
+# #   print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a + ",   coeff_b = " + coeff_b);
+#     print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   mean_shift = " + mean_shift + ",   sigma_shift = " + sigma_shift + ",   shift = " + shift);
+# }
+# END DEBUG INSERT
+
+        # Perform the updates
+
+        bilinear_form = bilinear_form + shift * bilinear_updater;
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars = freeVars + shift * freeVars_updater;
+            regresValues = regresValues + shift * regresValues_updater;
+        } else {
+            params = params + shift * params_updater;
+            regresParams = regresParams + shift * regresParams_updater;
+        }
+    }
+    
+    # Update / adjust the reports and the parameters
+    
+    reports = CReps %*% freeVars + dReps;
+    ### DELETE: reports_matrix = matricize (reports, num_attrs);
+    reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
+        
+    # Make an observation of the reports and/or the parameters
+    
+    if (is_observed > 0)
+    {
+        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
+        num_of_observed_reports = num_of_observed_reports + 1;
+
+        sum_of_observed_params = sum_of_observed_params + params;
+        sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
+    }
+
+# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
+# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
+# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
+# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
+# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
+# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
+# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
+# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
+
+# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
+# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
+# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
+# v10=castAsScalar(round(reports_matrix[10,num_terms]));
+# v16=castAsScalar(round(reports_matrix[16,num_terms]));
+# v19=castAsScalar(round(reports_matrix[19,num_terms]));
+
+#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
+## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
+#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
+## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
+
+}
+
+print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
+print ("Writing out the results...");
+
+avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
+avg_params = sum_of_observed_params / num_of_observed_reports;
+write (avg_reports_matrix, $11, format="text");
+write (avg_params, $12, format="text");
+
+print ("END ImputeGaussMCMC");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/old/imputeGaussMCMC.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/old/imputeGaussMCMC.dml b/src/test/scripts/applications/impute/old/imputeGaussMCMC.dml
index 3b50c7e..77bd21c 100644
--- a/src/test/scripts/applications/impute/old/imputeGaussMCMC.dml
+++ b/src/test/scripts/applications/impute/old/imputeGaussMCMC.dml
@@ -1,420 +1,420 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
-# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
-# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
-# Soft linear regression constraints define dependencies between values in/across the reports.
-# Linear regression parameters are unknown and sampled together with the missing values in the reports.
-#
-# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
-#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
-#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
-#
-# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
-#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
-#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
-#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
-#
-# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
-#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
-#
-# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
-#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
-#
-# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
-#          from the vector of parameters to the stretched matrix of regression factors.
-#
-# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
-#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
-#
-# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
-#
-# INPUT 8 : Number of factors in a regression equation, including the estimated value
-# INPUT 9 : Number of burn-in  full iterations (that sample each variable and each parameter once)
-# INPUT 10: Number of observed full iterations (that sample each variable and each parameter once)
-#
-# INPUT 11: Output file name and path for the average MCMC reports table
-# INPUT 12: Output file for debugging (currently: the average parameters vector)
-#
-# Example:
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
-#    test/scripts/applications/impute/initial_reports
-#    test/scripts/applications/impute/CReps
-#    test/scripts/applications/impute/RegresValueMap
-#    test/scripts/applications/impute/RegresFactorDefault
-#    test/scripts/applications/impute/RegresParamMap
-#    test/scripts/applications/impute/RegresCoeffDefault
-#    test/scripts/applications/impute/RegresScaleMult
-#    4 200 1000
-#    test/scripts/applications/impute/output_reports
-#    test/scripts/applications/impute/debug_info
-
-
-print ("START ImputeGaussMCMC");
-print ("Reading the input files...");
-
-initial_reports = read ($1);
-dReps = vectorize (initial_reports);
-CReps = read ($2);
-
-num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
-num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
-num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
-
-# We assume that all report-series consistent with hard constraints form an affine set:
-#     reports = CReps %*% freeVars + dReps
-# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
-# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
-
-RegresValueMap = read ($3);
-RegresFactorDefault = read ($4);
-RegresParamMap = read ($5); 
-RegresCoeffDefault = read ($6); 
-RegresScaleMult = read ($7);
-tRegresScaleMult = t(RegresScaleMult);
-
-num_factors = $8;   # Number of factors in each regression equation, including the estimated value
-num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
-num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
-num_burnin_iterations = $9;
-num_observed_iterations = $10;
-
-num_opt_iter = 20;
-print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
-
-freeVars = matrix (0.0, rows = num_frees, cols = 1);
-params = matrix (1.0, rows = num_params, cols = 1);
-reports = CReps %*% freeVars + dReps;
-
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-bilinear_vector = regresValues * regresParams;
-bilinear_form = matricize (bilinear_vector, num_factors);
-bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
-    
-opt_iter = 1;
-is_step_params = 1;
-is_opt_converged = 0;
-
-print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
-        
-
-while (is_opt_converged == 0)
-{
-    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
-
-    # Compute gradient
-
-    gradient = matrix (0.0, rows = deg, cols = 1);
-    for (i in 1:deg)
-    {
-        if (is_step_params == 1) {
-            bilinear_vector = regresValues * RegresParamMap [, i];
-        } else {
-            bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
-        }
-        bilinear_updater = matricize (bilinear_vector, num_factors);
-        q_minus_1 = sum (tRegresScaleMult * colSums (bilinear_form - bilinear_updater) * colSums (bilinear_form - bilinear_updater));
-        q_plus_1  = sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater) * colSums (bilinear_form + bilinear_updater));
-        gradient [i, 1] = 0.5 * (q_plus_1 - q_minus_1) + gradient [i, 1];
-    }
-    
-    # Make a few conjugate gradient steps
-    
-    shift_vector = matrix (0.0, rows = deg, cols = 1);
-    residual = gradient;
-    p = - residual;
-    norm_r2 = sum (residual * residual);
-
-    for (j in 1:3)
-    {
-        q = matrix (0.0, rows = deg, cols = 1);
-        for (i in 1:deg) # Want: q = A %*% p;
-        {
-            if (is_step_params == 1) {
-                bilinear_vector = regresValues * RegresParamMap [, i];
-            } else {
-                bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
-            }
-            bilinear_updater_1 = matricize (bilinear_vector, num_factors);
-
-            if (is_step_params == 1) {
-                bilinear_vector = regresValues * (RegresParamMap %*% p);
-            } else {
-                bilinear_vector = (RegresValueMap %*% CReps %*% p) * regresParams;
-            }
-            bilinear_updater_p = matricize (bilinear_vector, num_factors);
-            
-            quadratic_plus_1  = 
-                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_1) * colSums (bilinear_form + bilinear_updater_1));
-            quadratic_plus_p  = 
-                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_p) * colSums (bilinear_form + bilinear_updater_p));
-            quadratic_plus_both = 
-                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p) * colSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p));
-            q [i, 1] = (quadratic_plus_both - quadratic_plus_1 - quadratic_plus_p + bilinear_form_value) + q [i, 1];
-        }
-        
-        alpha = norm_r2 / castAsScalar (t(p) %*% q);
-        shift_vector = shift_vector + alpha * p;
-        old_norm_r2 = norm_r2;
-        residual = residual + alpha * q;
-        norm_r2 = sum (residual * residual);
-        p = - residual + (norm_r2 / old_norm_r2) * p;
-    }
-
-    if (is_step_params == 1) {
-        params = params + shift_vector;
-        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    } else {
-        freeVars = freeVars + shift_vector;
-        reports = CReps %*% freeVars + dReps;
-        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    }
-    
-    # Update the bilinear form and check convergence
-
-    if (is_step_params == 1) {
-        old_bilinear_form_value = bilinear_form_value;
-    }
-    bilinear_vector = regresValues * regresParams;
-    bilinear_form = matricize (bilinear_vector, num_factors);
-    bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
-    
-    if (is_step_params == 1) {
-        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
-    } else {
-        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
-    }
-    
-    is_step_params = 1 - is_step_params;
-    opt_iter = opt_iter + 1;
-
-    if (is_step_params == 1 & opt_iter > num_opt_iter) {
-        is_opt_converged = 1;
-    }
-}
-
-print ("Performing MCMC initialization...");
-
-num_iter = num_burnin_iterations + num_observed_iterations;
-dim_sample = num_frees + num_params;
-sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
-
-# Generate a random permutation matrix for the sampling order of freeVars and params
-
-SampleOrder = diag (sample_ones);
-num_swaps = 10 * dim_sample;
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-left_swap  = round (0.5 + dim_sample * rnd);
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-right_swap = round (0.5 + dim_sample * rnd);
-for (swap_i in 1:num_swaps) {
-    l = castAsScalar (left_swap  [swap_i, 1]);
-    r = castAsScalar (right_swap [swap_i, 1]);
-    if (l != r) {
-        tmp_row = SampleOrder [l, ];
-        SampleOrder [l, ] = SampleOrder [r, ];
-        SampleOrder [r, ] = tmp_row;
-    }
-}
-
-isVar = colSums (SampleOrder [1 : num_frees, ]);
-sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
-
-num_of_observed_reports = 0;
-pi = 3.1415926535897932384626433832795;
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# Starting MCMC iterations
-
-for (iter in 1:num_iter)
-{
-    # Initialize (bi-)linear forms
-    
-    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-    bilinear_form_vector = regresValues * regresParams;
-    bilinear_form = matricize (bilinear_form_vector, num_factors);
-    bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
-
-    if (iter <= num_burnin_iterations) {
-        print ("MCMC iteration " + iter + " (burn-in) :  bilinear form value = " + bilinear_form_value);
-    } else {
-        print ("MCMC iteration " + iter + " (observed):  bilinear form value = " + bilinear_form_value);
-    }
-    
-    # Create a normally distributed random sample
-    
-    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
-    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
-    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
-    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
-    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
-    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
-    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
-        
-    # Initialize updaters
-    
-    freeVars_updater = freeVars * 0.0;
-    params_updater = params * 0.0;
-    regresValues_updater = regresValues * 0.0;
-    regresParams_updater = regresParams * 0.0;
-    bilinear_updater_vector = bilinear_form_vector * 0.0;
-    
-    # Perform the sampling
-
-    for (idx in 1:dim_sample)
-    {
-        # Generate the sample unit-vector and updaters
-        
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars_updater = SampleOrder [1 : num_frees, idx];
-            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
-            bilinear_updater_vector = regresValues_updater * regresParams;
-        } else {
-            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
-            regresParams_updater = RegresParamMap %*% params_updater;
-            bilinear_updater_vector = regresValues * regresParams_updater;
-        }
-        bilinear_updater = matricize (bilinear_updater_vector, num_factors);
-            
-        # Compute the quadratic by three shift-points: -1, 0, +1
-        
-        bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
-        q_minus_1 = sum (tRegresScaleMult * colSums (bilinear_form - bilinear_updater) * colSums (bilinear_form - bilinear_updater));
-        q_plus_1  = sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater) * colSums (bilinear_form + bilinear_updater));
-        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
-        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
-
-# BEGIN DEBUG INSERT
-# mmm = 1;
-# if (castAsScalar (isVar [1, idx]) > 0.5) {
-# for (iii in 2:num_frees) {if (castAsScalar (freeVars_updater [iii, 1] - freeVars_updater [mmm, 1]) > 0) {mmm = iii;}}
-# print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a);
-# } else {
-# for (iii in 2:num_params) {if (castAsScalar (params_updater [iii, 1] - params_updater [mmm, 1]) > 0) {mmm = iii;}}
-# print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a);
-# }
-# END DEBUG INSERT
-
-        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
-        # then compute the shift to get the new sample
-            
-        mean_shift  = - coeff_b / (2.0 * coeff_a);
-        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
-        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
-            
-        # Perform the updates
-
-        bilinear_form = bilinear_form + shift * bilinear_updater;
-        if (castAsScalar (isVar [1, idx]) > 0.5) {
-            freeVars = freeVars + shift * freeVars_updater;
-            regresValues = regresValues + shift * regresValues_updater;
-        } else {
-            params = params + shift * params_updater;
-            regresParams = regresParams + shift * regresParams_updater;
-        }
-    }
-    
-    # Update / adjust the reports and the parameters
-    
-    reports = CReps %*% freeVars + dReps;
-    reports_matrix = matricize (reports, num_attrs);
-    
-    # Make an observation of the reports and/or the parameters
-    
-    if (iter > num_burnin_iterations)
-    {
-        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
-        num_of_observed_reports = num_of_observed_reports + 1;
-
-        sum_of_observed_params = sum_of_observed_params + params;
-    }
-
-
-v1 =castAsScalar(round(reports_matrix[ 1,num_terms]));
-v2 =castAsScalar(round(reports_matrix[ 2,num_terms]));
-v3 =castAsScalar(round(reports_matrix[ 3,num_terms]));
-v4 =castAsScalar(round(reports_matrix[ 4,num_terms]));
-v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
-v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
-v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
-v10=castAsScalar(round(reports_matrix[10,num_terms]));
-v16=castAsScalar(round(reports_matrix[16,num_terms]));
-v19=castAsScalar(round(reports_matrix[19,num_terms]));
-print (
-" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4 + ", 5:" + v5 +
-", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
-
-}
-
-print ("Writing out the results...");
-
-avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
-avg_params = sum_of_observed_params / num_of_observed_reports;
-write (avg_reports_matrix, $11, format="text");
-write (avg_params, $12, format="text");
-
-print ("END ImputeGaussMCMC");
-
-
-
-
-# Outputs a column vector that consists of the columns of the input matrix in sequential order
-# NEEDS TO BE PARALLELIZED
-vectorize = function (Matrix[double] M) return (Matrix[double] v)
-{
-    n_rows = nrow (M);
-    n_cols = ncol (M);
-    n = n_rows * n_cols;
-    v = matrix (0.0, rows = n, cols = 1);
-    for (i in 1:n_cols) {
-        left_row = n_rows * (i-1) + 1;
-        right_row = n_rows * i;
-        v [left_row : right_row, 1] = M [, i];
-    }
-}
-
-# Takes a column vector, splits it into columns of "n_rows" in each, and combines into a matrix
-# NEEDS TO BE PARALLELIZED
-matricize = function (Matrix[double] v, int n_rows) return (Matrix[double] M)
-{
-    zero = matrix (0.0, rows = 1, cols = 1);
-    n = nrow (v);
-    n_cols = castAsScalar (round (zero + (n / n_rows)));
-    if (n_cols * n_rows < n) {
-        n_cols = n_cols + 1;
-    }
-    M = matrix (0.0, rows = n_rows, cols = n_cols);
-    for (i in 1:n_cols) {
-        left_row = n_rows * (i-1) + 1;
-        right_row = n_rows * i;
-        if (right_row <= n) {
-            M [, i] = v [left_row : right_row, 1];
-        } else {
-            new_right = n - left_row + 1;
-            M [1 : new_right, i] = v [left_row : n, 1];
-        }
-    }
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
+# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
+# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
+# Soft linear regression constraints define dependencies between values in/across the reports.
+# Linear regression parameters are unknown and sampled together with the missing values in the reports.
+#
+# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
+#          but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
+#          There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
+#
+# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
+#          "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
+#          where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
+#          Term = t, attribute = i  -->  index = (t-1) * num_attrs + i
+#
+# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
+#          a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
+#
+# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
+#          (if nonzero) to be added to the regression factors before they are multiplied by parameters.
+#
+# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
+#          from the vector of parameters to the stretched matrix of regression factors.
+#
+# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
+#          (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
+#
+# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
+#
+# INPUT 8 : Number of factors in a regression equation, including the estimated value
+# INPUT 9 : Number of burn-in  full iterations (that sample each variable and each parameter once)
+# INPUT 10: Number of observed full iterations (that sample each variable and each parameter once)
+#
+# INPUT 11: Output file name and path for the average MCMC reports table
+# INPUT 12: Output file for debugging (currently: the average parameters vector)
+#
+# Example:
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
+#    test/scripts/applications/impute/initial_reports
+#    test/scripts/applications/impute/CReps
+#    test/scripts/applications/impute/RegresValueMap
+#    test/scripts/applications/impute/RegresFactorDefault
+#    test/scripts/applications/impute/RegresParamMap
+#    test/scripts/applications/impute/RegresCoeffDefault
+#    test/scripts/applications/impute/RegresScaleMult
+#    4 200 1000
+#    test/scripts/applications/impute/output_reports
+#    test/scripts/applications/impute/debug_info
+
+
+print ("START ImputeGaussMCMC");
+print ("Reading the input files...");
+
+initial_reports = read ($1);
+dReps = vectorize (initial_reports);
+CReps = read ($2);
+
+num_terms = ncol (initial_reports);   # Number of periods for which reports are produced
+num_attrs = nrow (initial_reports);   # Number of attribute values per each term report
+num_frees = ncol (CReps);   # Number of free variables used to describe all consistent reports
+
+# We assume that all report-series consistent with hard constraints form an affine set:
+#     reports = CReps %*% freeVars + dReps
+# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
+# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
+
+RegresValueMap = read ($3);
+RegresFactorDefault = read ($4);
+RegresParamMap = read ($5); 
+RegresCoeffDefault = read ($6); 
+RegresScaleMult = read ($7);
+tRegresScaleMult = t(RegresScaleMult);
+
+num_factors = $8;   # Number of factors in each regression equation, including the estimated value
+num_reg_eqs = nrow (RegresParamMap) / num_factors;   # Number of regression equations
+num_params  = ncol (RegresParamMap);   # Number of parameters used in all regressions
+num_burnin_iterations = $9;
+num_observed_iterations = $10;
+
+num_opt_iter = 20;
+print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
+
+freeVars = matrix (0.0, rows = num_frees, cols = 1);
+params = matrix (1.0, rows = num_params, cols = 1);
+reports = CReps %*% freeVars + dReps;
+
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+bilinear_vector = regresValues * regresParams;
+bilinear_form = matricize (bilinear_vector, num_factors);
+bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
+    
+opt_iter = 1;
+is_step_params = 1;
+is_opt_converged = 0;
+
+print ("Before optimization:    Initial bilinear form value = " + bilinear_form_value);
+        
+
+while (is_opt_converged == 0)
+{
+    deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
+
+    # Compute gradient
+
+    gradient = matrix (0.0, rows = deg, cols = 1);
+    for (i in 1:deg)
+    {
+        if (is_step_params == 1) {
+            bilinear_vector = regresValues * RegresParamMap [, i];
+        } else {
+            bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
+        }
+        bilinear_updater = matricize (bilinear_vector, num_factors);
+        q_minus_1 = sum (tRegresScaleMult * colSums (bilinear_form - bilinear_updater) * colSums (bilinear_form - bilinear_updater));
+        q_plus_1  = sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater) * colSums (bilinear_form + bilinear_updater));
+        gradient [i, 1] = 0.5 * (q_plus_1 - q_minus_1) + gradient [i, 1];
+    }
+    
+    # Make a few conjugate gradient steps
+    
+    shift_vector = matrix (0.0, rows = deg, cols = 1);
+    residual = gradient;
+    p = - residual;
+    norm_r2 = sum (residual * residual);
+
+    for (j in 1:3)
+    {
+        q = matrix (0.0, rows = deg, cols = 1);
+        for (i in 1:deg) # Want: q = A %*% p;
+        {
+            if (is_step_params == 1) {
+                bilinear_vector = regresValues * RegresParamMap [, i];
+            } else {
+                bilinear_vector = (RegresValueMap %*% CReps [, i]) * regresParams;
+            }
+            bilinear_updater_1 = matricize (bilinear_vector, num_factors);
+
+            if (is_step_params == 1) {
+                bilinear_vector = regresValues * (RegresParamMap %*% p);
+            } else {
+                bilinear_vector = (RegresValueMap %*% CReps %*% p) * regresParams;
+            }
+            bilinear_updater_p = matricize (bilinear_vector, num_factors);
+            
+            quadratic_plus_1  = 
+                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_1) * colSums (bilinear_form + bilinear_updater_1));
+            quadratic_plus_p  = 
+                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_p) * colSums (bilinear_form + bilinear_updater_p));
+            quadratic_plus_both = 
+                sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p) * colSums (bilinear_form + bilinear_updater_1 + bilinear_updater_p));
+            q [i, 1] = (quadratic_plus_both - quadratic_plus_1 - quadratic_plus_p + bilinear_form_value) + q [i, 1];
+        }
+        
+        alpha = norm_r2 / castAsScalar (t(p) %*% q);
+        shift_vector = shift_vector + alpha * p;
+        old_norm_r2 = norm_r2;
+        residual = residual + alpha * q;
+        norm_r2 = sum (residual * residual);
+        p = - residual + (norm_r2 / old_norm_r2) * p;
+    }
+
+    if (is_step_params == 1) {
+        params = params + shift_vector;
+        regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    } else {
+        freeVars = freeVars + shift_vector;
+        reports = CReps %*% freeVars + dReps;
+        regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    }
+    
+    # Update the bilinear form and check convergence
+
+    if (is_step_params == 1) {
+        old_bilinear_form_value = bilinear_form_value;
+    }
+    bilinear_vector = regresValues * regresParams;
+    bilinear_form = matricize (bilinear_vector, num_factors);
+    bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
+    
+    if (is_step_params == 1) {
+        print ("Optimization step " + opt_iter + " (params) :  bilinear form value = " + bilinear_form_value);
+    } else {
+        print ("Optimization step " + opt_iter + " (reports):  bilinear form value = " + bilinear_form_value);
+    }
+    
+    is_step_params = 1 - is_step_params;
+    opt_iter = opt_iter + 1;
+
+    if (is_step_params == 1 & opt_iter > num_opt_iter) {
+        is_opt_converged = 1;
+    }
+}
+
+print ("Performing MCMC initialization...");
+
+num_iter = num_burnin_iterations + num_observed_iterations;
+dim_sample = num_frees + num_params;
+sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
+
+# Generate a random permutation matrix for the sampling order of freeVars and params
+
+SampleOrder = diag (sample_ones);
+num_swaps = 10 * dim_sample;
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+left_swap  = round (0.5 + dim_sample * rnd);
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+right_swap = round (0.5 + dim_sample * rnd);
+for (swap_i in 1:num_swaps) {
+    l = castAsScalar (left_swap  [swap_i, 1]);
+    r = castAsScalar (right_swap [swap_i, 1]);
+    if (l != r) {
+        tmp_row = SampleOrder [l, ];
+        SampleOrder [l, ] = SampleOrder [r, ];
+        SampleOrder [r, ] = tmp_row;
+    }
+}
+
+isVar = colSums (SampleOrder [1 : num_frees, ]);
+sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
+
+num_of_observed_reports = 0;
+pi = 3.1415926535897932384626433832795;
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# Starting MCMC iterations
+
+for (iter in 1:num_iter)
+{
+    # Initialize (bi-)linear forms
+    
+    regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+    regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+    bilinear_form_vector = regresValues * regresParams;
+    bilinear_form = matricize (bilinear_form_vector, num_factors);
+    bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
+
+    if (iter <= num_burnin_iterations) {
+        print ("MCMC iteration " + iter + " (burn-in) :  bilinear form value = " + bilinear_form_value);
+    } else {
+        print ("MCMC iteration " + iter + " (observed):  bilinear form value = " + bilinear_form_value);
+    }
+    
+    # Create a normally distributed random sample
+    
+    dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
+    rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+    rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
+    rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
+    rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
+    rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
+    rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
+        
+    # Initialize updaters
+    
+    freeVars_updater = freeVars * 0.0;
+    params_updater = params * 0.0;
+    regresValues_updater = regresValues * 0.0;
+    regresParams_updater = regresParams * 0.0;
+    bilinear_updater_vector = bilinear_form_vector * 0.0;
+    
+    # Perform the sampling
+
+    for (idx in 1:dim_sample)
+    {
+        # Generate the sample unit-vector and updaters
+        
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars_updater = SampleOrder [1 : num_frees, idx];
+            regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
+            bilinear_updater_vector = regresValues_updater * regresParams;
+        } else {
+            params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
+            regresParams_updater = RegresParamMap %*% params_updater;
+            bilinear_updater_vector = regresValues * regresParams_updater;
+        }
+        bilinear_updater = matricize (bilinear_updater_vector, num_factors);
+            
+        # Compute the quadratic by three shift-points: -1, 0, +1
+        
+        bilinear_form_value = sum (tRegresScaleMult * colSums (bilinear_form) * colSums (bilinear_form));
+        q_minus_1 = sum (tRegresScaleMult * colSums (bilinear_form - bilinear_updater) * colSums (bilinear_form - bilinear_updater));
+        q_plus_1  = sum (tRegresScaleMult * colSums (bilinear_form + bilinear_updater) * colSums (bilinear_form + bilinear_updater));
+        coeff_b = (q_plus_1 - q_minus_1) / 2.0;
+        coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
+
+# BEGIN DEBUG INSERT
+# mmm = 1;
+# if (castAsScalar (isVar [1, idx]) > 0.5) {
+# for (iii in 2:num_frees) {if (castAsScalar (freeVars_updater [iii, 1] - freeVars_updater [mmm, 1]) > 0) {mmm = iii;}}
+# print ("freeVars[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a);
+# } else {
+# for (iii in 2:num_params) {if (castAsScalar (params_updater [iii, 1] - params_updater [mmm, 1]) > 0) {mmm = iii;}}
+# print ("  params[" + mmm + "]:  q_minus_1 = " + q_minus_1 + ",   q_plus_1 = " + q_plus_1 + ",   coeff_a = " + coeff_a);
+# }
+# END DEBUG INSERT
+
+        # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
+        # then compute the shift to get the new sample
+            
+        mean_shift  = - coeff_b / (2.0 * coeff_a);
+        sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
+        shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
+            
+        # Perform the updates
+
+        bilinear_form = bilinear_form + shift * bilinear_updater;
+        if (castAsScalar (isVar [1, idx]) > 0.5) {
+            freeVars = freeVars + shift * freeVars_updater;
+            regresValues = regresValues + shift * regresValues_updater;
+        } else {
+            params = params + shift * params_updater;
+            regresParams = regresParams + shift * regresParams_updater;
+        }
+    }
+    
+    # Update / adjust the reports and the parameters
+    
+    reports = CReps %*% freeVars + dReps;
+    reports_matrix = matricize (reports, num_attrs);
+    
+    # Make an observation of the reports and/or the parameters
+    
+    if (iter > num_burnin_iterations)
+    {
+        sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
+        num_of_observed_reports = num_of_observed_reports + 1;
+
+        sum_of_observed_params = sum_of_observed_params + params;
+    }
+
+
+v1 =castAsScalar(round(reports_matrix[ 1,num_terms]));
+v2 =castAsScalar(round(reports_matrix[ 2,num_terms]));
+v3 =castAsScalar(round(reports_matrix[ 3,num_terms]));
+v4 =castAsScalar(round(reports_matrix[ 4,num_terms]));
+v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
+v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
+v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
+v10=castAsScalar(round(reports_matrix[10,num_terms]));
+v16=castAsScalar(round(reports_matrix[16,num_terms]));
+v19=castAsScalar(round(reports_matrix[19,num_terms]));
+print (
+" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4 + ", 5:" + v5 +
+", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
+
+}
+
+print ("Writing out the results...");
+
+avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
+avg_params = sum_of_observed_params / num_of_observed_reports;
+write (avg_reports_matrix, $11, format="text");
+write (avg_params, $12, format="text");
+
+print ("END ImputeGaussMCMC");
+
+
+
+
+# Outputs a column vector that consists of the columns of the input matrix in sequential order
+# NEEDS TO BE PARALLELIZED
+vectorize = function (Matrix[double] M) return (Matrix[double] v)
+{
+    n_rows = nrow (M);
+    n_cols = ncol (M);
+    n = n_rows * n_cols;
+    v = matrix (0.0, rows = n, cols = 1);
+    for (i in 1:n_cols) {
+        left_row = n_rows * (i-1) + 1;
+        right_row = n_rows * i;
+        v [left_row : right_row, 1] = M [, i];
+    }
+}
+
+# Takes a column vector, splits it into columns of "n_rows" in each, and combines into a matrix
+# NEEDS TO BE PARALLELIZED
+matricize = function (Matrix[double] v, int n_rows) return (Matrix[double] M)
+{
+    zero = matrix (0.0, rows = 1, cols = 1);
+    n = nrow (v);
+    n_cols = castAsScalar (round (zero + (n / n_rows)));
+    if (n_cols * n_rows < n) {
+        n_cols = n_cols + 1;
+    }
+    M = matrix (0.0, rows = n_rows, cols = n_cols);
+    for (i in 1:n_cols) {
+        left_row = n_rows * (i-1) + 1;
+        right_row = n_rows * i;
+        if (right_row <= n) {
+            M [, i] = v [left_row : right_row, 1];
+        } else {
+            new_right = n - left_row + 1;
+            M [1 : new_right, i] = v [left_row : n, 1];
+        }
+    }
+}


[21/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/kmeans/Kmeans.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/kmeans/Kmeans.dml b/src/test/scripts/applications/kmeans/Kmeans.dml
index 3e1f8c9..368b98d 100644
--- a/src/test/scripts/applications/kmeans/Kmeans.dml
+++ b/src/test/scripts/applications/kmeans/Kmeans.dml
@@ -1,108 +1,108 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements the k-Means clustering algorithm
-# INPUT 1: Input file name for X input data (data records)
-# INPUT 2: The number k of centroids
-# INPUT 3: Output file name for the centroids
-# Example: hadoop jar SystemML.jar -f kMeans.dml -args X_input_file 5 centroids_file
-
-print( "Performing initialization..." );
-
-# X : matrix of data points as rows
-X = read( $1 );
-
-num_records = nrow( X );
-num_features = ncol( X );
-num_centroids = $2;
-
-one_per_record = matrix( 1.0, rows = num_records, cols = 1);
-one_per_feature = matrix( 1.0, rows = num_features, cols = 1);
-one_per_centroid = matrix( 1.0, rows = num_centroids, cols = 1);
-
-# Y : matrix of centroids as rows
-Y = matrix( 0.0, rows = num_centroids, cols = num_features );
-# D : matrix of squared distances from X rows to Y rows, up to a Y-independent term
-D = matrix( 0.0, rows = num_records, cols = num_centroids );
-
-print( "Taking a data sample to compute the convergence criterion..." );
-
-X_sample = X;
-sample_size = 1000;
-if (num_records > sample_size)
-{
-   # Sample approximately 1000 records (Bernoulli sampling) 
-   P = Rand( rows = num_records, cols = 1, min = 0.0, max = 1.0 );
-   P = ppred( P * num_records, sample_size, "<=" );
-   X_sample = X * (P %*% t( one_per_feature ));
-   X_sample = removeEmpty( target = X_sample, margin = "rows" );
-}
-
-sample_size = nrow( X_sample );
-one_per_sample = matrix( 1.0, rows = sample_size, cols = 1 );
-
-# Compute eps for the convergence criterion as the average square distance
-# between records in the sample times a small number
-
-eps = 0.0000001 * 
-    sum (one_per_sample %*% t( rowSums( X_sample * X_sample ) ) 
-    + rowSums( X_sample * X_sample ) %*% t( one_per_sample ) 
-    - 2.0 * X_sample %*% t( X_sample )) / (sample_size * sample_size);
-
-# Start iterations
-
-centroid_change = 10.0 + eps;
-iter_count = 0;
-print ("Starting the iterations...");
-
-while (centroid_change > eps)
-{
-    iter_count = iter_count + 1;
-    old_Y = matrix( 0.0, rows = num_centroids, cols = num_features );
-    if ( iter_count == 1 
-        | ( centroid_change != centroid_change )             #  Check if
-        | ( ( centroid_change == centroid_change + 1 )       #  centroid_change
-            & ( centroid_change == 2 * centroid_change ) ) ) #  is a "NaN"
-    {
-        # Start anew, by setting D to a random matrix
-        D = Rand (rows = num_records, cols = num_centroids, min = 0.0, max = 1.0);
-    } else {
-        old_Y = Y;
-        # Euclidean squared distances from records (X rows) to centroids (Y rows)
-        # without a redundant Y-independent term
-        D = one_per_record %*% t(rowSums (Y * Y)) - 2.0 * X %*% t(Y);
-    }
-    # Find the closest centroid for each record
-    P = ppred (D, rowMins (D) %*% t(one_per_centroid), "<=");
-    # If some records belong to multiple centroids, share them equally
-    P = P / (rowSums (P) %*% t(one_per_centroid));
-    # Normalize the columns of P to compute record weights for new centroids
-    P = P / (one_per_record %*% colSums (P));
-    # Compute new centroids as weighted averages over the records
-    Y = t(P) %*% X;
-    # Measure the squared difference between old and new centroids
-    centroid_change = sum ( (Y - old_Y) * (Y - old_Y) ) / num_centroids;
-    print ("Iteration " + iter_count + ":  centroid_change = " + centroid_change);
-}
-
-print( "Writing out the centroids..." );
-write( Y, $3, format = "text" );
-print( "Done." );
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements the k-Means clustering algorithm
+# INPUT 1: Input file name for X input data (data records)
+# INPUT 2: The number k of centroids
+# INPUT 3: Output file name for the centroids
+# Example: hadoop jar SystemML.jar -f kMeans.dml -args X_input_file 5 centroids_file
+
+print( "Performing initialization..." );
+
+# X : matrix of data points as rows
+X = read( $1 );
+
+num_records = nrow( X );
+num_features = ncol( X );
+num_centroids = $2;
+
+one_per_record = matrix( 1.0, rows = num_records, cols = 1);
+one_per_feature = matrix( 1.0, rows = num_features, cols = 1);
+one_per_centroid = matrix( 1.0, rows = num_centroids, cols = 1);
+
+# Y : matrix of centroids as rows
+Y = matrix( 0.0, rows = num_centroids, cols = num_features );
+# D : matrix of squared distances from X rows to Y rows, up to a Y-independent term
+D = matrix( 0.0, rows = num_records, cols = num_centroids );
+
+print( "Taking a data sample to compute the convergence criterion..." );
+
+X_sample = X;
+sample_size = 1000;
+if (num_records > sample_size)
+{
+   # Sample approximately 1000 records (Bernoulli sampling) 
+   P = Rand( rows = num_records, cols = 1, min = 0.0, max = 1.0 );
+   P = ppred( P * num_records, sample_size, "<=" );
+   X_sample = X * (P %*% t( one_per_feature ));
+   X_sample = removeEmpty( target = X_sample, margin = "rows" );
+}
+
+sample_size = nrow( X_sample );
+one_per_sample = matrix( 1.0, rows = sample_size, cols = 1 );
+
+# Compute eps for the convergence criterion as the average square distance
+# between records in the sample times a small number
+
+eps = 0.0000001 * 
+    sum (one_per_sample %*% t( rowSums( X_sample * X_sample ) ) 
+    + rowSums( X_sample * X_sample ) %*% t( one_per_sample ) 
+    - 2.0 * X_sample %*% t( X_sample )) / (sample_size * sample_size);
+
+# Start iterations
+
+centroid_change = 10.0 + eps;
+iter_count = 0;
+print ("Starting the iterations...");
+
+while (centroid_change > eps)
+{
+    iter_count = iter_count + 1;
+    old_Y = matrix( 0.0, rows = num_centroids, cols = num_features );
+    if ( iter_count == 1 
+        | ( centroid_change != centroid_change )             #  Check if
+        | ( ( centroid_change == centroid_change + 1 )       #  centroid_change
+            & ( centroid_change == 2 * centroid_change ) ) ) #  is a "NaN"
+    {
+        # Start anew, by setting D to a random matrix
+        D = Rand (rows = num_records, cols = num_centroids, min = 0.0, max = 1.0);
+    } else {
+        old_Y = Y;
+        # Euclidean squared distances from records (X rows) to centroids (Y rows)
+        # without a redundant Y-independent term
+        D = one_per_record %*% t(rowSums (Y * Y)) - 2.0 * X %*% t(Y);
+    }
+    # Find the closest centroid for each record
+    P = ppred (D, rowMins (D) %*% t(one_per_centroid), "<=");
+    # If some records belong to multiple centroids, share them equally
+    P = P / (rowSums (P) %*% t(one_per_centroid));
+    # Normalize the columns of P to compute record weights for new centroids
+    P = P / (one_per_record %*% colSums (P));
+    # Compute new centroids as weighted averages over the records
+    Y = t(P) %*% X;
+    # Measure the squared difference between old and new centroids
+    centroid_change = sum ( (Y - old_Y) * (Y - old_Y) ) / num_centroids;
+    print ("Iteration " + iter_count + ":  centroid_change = " + centroid_change);
+}
+
+print( "Writing out the centroids..." );
+write( Y, $3, format = "text" );
+print( "Done." );

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/l2svm/L2SVM.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/l2svm/L2SVM.R b/src/test/scripts/applications/l2svm/L2SVM.R
index ccf6ca1..bf419f1 100644
--- a/src/test/scripts/applications/l2svm/L2SVM.R
+++ b/src/test/scripts/applications/l2svm/L2SVM.R
@@ -1,103 +1,103 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.applications.L2SVMTest.java
-# command line invocation assuming $L2SVM_HOME is set to the home of the R script
-# Rscript $L2SVM_HOME/L2SVM.R $L2SVM_HOME/in/ 0.00000001 1 100 $L2SVM_HOME/expected/
-
-args <- commandArgs(TRUE)
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""));
-Y = readMM(paste(args[1], "Y.mtx", sep=""));
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(Y == check_min)
-num_max = sum(Y == check_max)
-if(num_min + num_max != nrow(Y)){ 
-	print("please check Y, it should contain only 2 labels") 
-}else{
-	if(check_min != -1 | check_max != +1) 
-		Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-}
-
-intercept = as.integer(args[2]);
-epsilon = as.double(args[3]);
-lambda = as.double(args[4]);
-maxiterations = as.integer(args[5]);
-
-N = nrow(X)
-D = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1,N,1)
-	X = cbind(X, ones);
-}
-
-num_rows_in_w = D
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, num_rows_in_w, 1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-Xw = matrix(0,nrow(X),1)
-iter = 0
-continue = TRUE
-while(continue && iter < maxiterations){
-	t = 0
-	Xd = X %*% s
-	wd = lambda * sum(w * s)
-	dd = lambda * sum(s * s)
-	continue1 = TRUE
-	while(continue1){
-		tmp_Xw = Xw + t*Xd
-		out = 1 - Y * (tmp_Xw)
-		sv = which(out > 0)
-		g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv])
-		h = dd + sum(Xd[sv] * Xd[sv])
-		t = t - g/h
-		continue1 = (g*g/h >= 1e-10)
-	}
-	
-	w = w + t*s
-	Xw = Xw + t*Xd
-		
-	out = 1 - Y * (X %*% w)
-	sv = which(out > 0)
-	obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w)
-	g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w
-	
-	print(paste("OBJ : ", obj))
-
-	continue = (t*sum(s * g_old) >= epsilon*obj)
-	
-	be = sum(g_new * g_new)/sum(g_old * g_old)
-	s = be * s + g_new
-	g_old = g_new
-	
-	iter = iter + 1
-}
-
-writeMM(as(w,"CsparseMatrix"), paste(args[6], "w", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.applications.L2SVMTest.java
+# command line invocation assuming $L2SVM_HOME is set to the home of the R script
+# Rscript $L2SVM_HOME/L2SVM.R $L2SVM_HOME/in/ 0.00000001 1 100 $L2SVM_HOME/expected/
+
+args <- commandArgs(TRUE)
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+Y = readMM(paste(args[1], "Y.mtx", sep=""));
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(Y == check_min)
+num_max = sum(Y == check_max)
+if(num_min + num_max != nrow(Y)){ 
+	print("please check Y, it should contain only 2 labels") 
+}else{
+	if(check_min != -1 | check_max != +1) 
+		Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+}
+
+intercept = as.integer(args[2]);
+epsilon = as.double(args[3]);
+lambda = as.double(args[4]);
+maxiterations = as.integer(args[5]);
+
+N = nrow(X)
+D = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1,N,1)
+	X = cbind(X, ones);
+}
+
+num_rows_in_w = D
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, num_rows_in_w, 1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+Xw = matrix(0,nrow(X),1)
+iter = 0
+continue = TRUE
+while(continue && iter < maxiterations){
+	t = 0
+	Xd = X %*% s
+	wd = lambda * sum(w * s)
+	dd = lambda * sum(s * s)
+	continue1 = TRUE
+	while(continue1){
+		tmp_Xw = Xw + t*Xd
+		out = 1 - Y * (tmp_Xw)
+		sv = which(out > 0)
+		g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv])
+		h = dd + sum(Xd[sv] * Xd[sv])
+		t = t - g/h
+		continue1 = (g*g/h >= 1e-10)
+	}
+	
+	w = w + t*s
+	Xw = Xw + t*Xd
+		
+	out = 1 - Y * (X %*% w)
+	sv = which(out > 0)
+	obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w)
+	g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w
+	
+	print(paste("OBJ : ", obj))
+
+	continue = (t*sum(s * g_old) >= epsilon*obj)
+	
+	be = sum(g_new * g_new)/sum(g_old * g_old)
+	s = be * s + g_new
+	g_old = g_new
+	
+	iter = iter + 1
+}
+
+writeMM(as(w,"CsparseMatrix"), paste(args[6], "w", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/l2svm/L2SVM.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/l2svm/L2SVM.dml b/src/test/scripts/applications/l2svm/L2SVM.dml
index bf7de14..13f2b4c 100644
--- a/src/test/scripts/applications/l2svm/L2SVM.dml
+++ b/src/test/scripts/applications/l2svm/L2SVM.dml
@@ -1,124 +1,124 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# How to invoke this dml script L2SVM.dml?
-# Assume L2SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log"
-
-# Note about inputs: 
-# Assumes that labels (entries in Y) 
-# are set to either -1 or +1
-# or the result of recoding
-
-cmdLine_fmt=ifdef($fmt,"text")
-cmdLine_icpt=ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-X = read($X)
-Y = read($Y)
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(ppred(Y, check_min, "=="))
-num_max = sum(ppred(Y, check_max, "=="))
-if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels")
-else{
-	if(check_min != -1 | check_max != +1) 
-		Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-}
-
-epsilon = cmdLine_tol
-lambda = cmdLine_reg
-maxiterations = cmdLine_maxiter
-intercept = cmdLine_icpt
-
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1, rows=num_samples, cols=1)
-	X = append(X, ones);
-}
-
-num_rows_in_w = dimensions
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-Xw = matrix(0, rows=nrow(X), cols=1)
-debug_str = "# Iter, Obj"
-iter = 0
-continue = 1
-while(continue == 1 & iter < maxiterations)  {
-	# minimizing primal obj along direction s
-	step_sz = 0
-	Xd = X %*% s
-	wd = lambda * sum(w * s)
-	dd = lambda * sum(s * s)
-	continue1 = 1
-	while(continue1 == 1){
-		tmp_Xw = Xw + step_sz*Xd
-		out = 1 - Y * (tmp_Xw)
-		sv = ppred(out, 0, ">")
-		out = out * sv
-		g = wd + step_sz*dd - sum(out * Y * Xd)
-		h = dd + sum(Xd * sv * Xd)
-		step_sz = step_sz - g/h
-		if (g*g/h < 0.0000000001){
-			continue1 = 0
-		}
-	}
-	
-	#update weights
-	w = w + step_sz*s
-	Xw = Xw + step_sz*Xd
-	
-	out = 1 - Y * Xw
-	sv = ppred(out, 0, ">")
-	out = sv * out
-	obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-	g_new = t(X) %*% (out * Y) - lambda * w
-	
-	print("OBJ = " + obj)
-	debug_str = append(debug_str, iter + "," + obj)
-	
-	tmp = sum(s * g_old)
-	if(step_sz*tmp < epsilon*obj){
-		continue = 0
-	}
-	
-	#non-linear CG step
-	be = sum(g_new * g_new)/sum(g_old * g_old)
-	s = be * s + g_new
-	g_old = g_new
-
-	iter = iter + 1
-}
-
-write(w, $model, format=cmdLine_fmt)
-write(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# How to invoke this dml script L2SVM.dml?
+# Assume L2SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log"
+
+# Note about inputs: 
+# Assumes that labels (entries in Y) 
+# are set to either -1 or +1
+# or the result of recoding
+
+cmdLine_fmt=ifdef($fmt,"text")
+cmdLine_icpt=ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+X = read($X)
+Y = read($Y)
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(ppred(Y, check_min, "=="))
+num_max = sum(ppred(Y, check_max, "=="))
+if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels")
+else{
+	if(check_min != -1 | check_max != +1) 
+		Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+}
+
+epsilon = cmdLine_tol
+lambda = cmdLine_reg
+maxiterations = cmdLine_maxiter
+intercept = cmdLine_icpt
+
+num_samples = nrow(X)
+dimensions = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1)
+	X = append(X, ones);
+}
+
+num_rows_in_w = dimensions
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, rows=num_rows_in_w, cols=1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+Xw = matrix(0, rows=nrow(X), cols=1)
+debug_str = "# Iter, Obj"
+iter = 0
+continue = 1
+while(continue == 1 & iter < maxiterations)  {
+	# minimizing primal obj along direction s
+	step_sz = 0
+	Xd = X %*% s
+	wd = lambda * sum(w * s)
+	dd = lambda * sum(s * s)
+	continue1 = 1
+	while(continue1 == 1){
+		tmp_Xw = Xw + step_sz*Xd
+		out = 1 - Y * (tmp_Xw)
+		sv = ppred(out, 0, ">")
+		out = out * sv
+		g = wd + step_sz*dd - sum(out * Y * Xd)
+		h = dd + sum(Xd * sv * Xd)
+		step_sz = step_sz - g/h
+		if (g*g/h < 0.0000000001){
+			continue1 = 0
+		}
+	}
+	
+	#update weights
+	w = w + step_sz*s
+	Xw = Xw + step_sz*Xd
+	
+	out = 1 - Y * Xw
+	sv = ppred(out, 0, ">")
+	out = sv * out
+	obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+	g_new = t(X) %*% (out * Y) - lambda * w
+	
+	print("OBJ = " + obj)
+	debug_str = append(debug_str, iter + "," + obj)
+	
+	tmp = sum(s * g_old)
+	if(step_sz*tmp < epsilon*obj){
+		continue = 0
+	}
+	
+	#non-linear CG step
+	be = sum(g_new * g_new)/sum(g_old * g_old)
+	s = be * s + g_new
+	g_old = g_new
+
+	iter = iter + 1
+}
+
+write(w, $model, format=cmdLine_fmt)
+write(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/l2svm/L2SVM.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/l2svm/L2SVM.pydml b/src/test/scripts/applications/l2svm/L2SVM.pydml
index d2f89e6..119ff44 100644
--- a/src/test/scripts/applications/l2svm/L2SVM.pydml
+++ b/src/test/scripts/applications/l2svm/L2SVM.pydml
@@ -1,119 +1,119 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# How to invoke this pydml script L2SVM.pydml?
-# Assume L2SVM_HOME is set to the home of the pydml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -python -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log"
-
-# Note about inputs: 
-# Assumes that labels (entries in Y) 
-# are set to either -1 or +1
-# or the result of recoding
-
-cmdLine_fmt=ifdef($fmt,"text")
-cmdLine_icpt=ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-X = load($X)
-Y = load($Y)
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(ppred(Y, check_min, "=="))
-num_max = sum(ppred(Y, check_max, "=="))
-if(num_min + num_max != nrow(Y)):
-    print("please check Y, it should contain only 2 labels")
-else:
-    if(check_min != -1 | check_max != +1):
-        Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-
-epsilon = cmdLine_tol
-lambda = cmdLine_reg
-maxiterations = cmdLine_maxiter
-intercept = cmdLine_icpt
-
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-if (intercept == 1):
-    ones  = full(1, rows=num_samples, cols=1)
-    X = append(X, ones)
-
-num_rows_in_w = dimensions
-if(intercept == 1):
-    num_rows_in_w = num_rows_in_w + 1
-w = full(0, rows=num_rows_in_w, cols=1)
-
-g_old = dot(transpose(X), Y)
-s = g_old
-
-Xw = full(0, rows=nrow(X), cols=1)
-debug_str = "# Iter, Obj"
-iter = 0
-continue = 1
-while(continue == 1 & iter < maxiterations):
-    # minimizing primal obj along direction s
-    step_sz = 0
-    Xd = dot(X, s)
-    wd = lambda * sum(w * s)
-    dd = lambda * sum(s * s)
-    continue1 = 1
-    while(continue1 == 1):
-        tmp_Xw = Xw + step_sz*Xd
-        out = 1 - Y * (tmp_Xw)
-        sv = ppred(out, 0, ">")
-        out = out * sv
-        g = wd + step_sz*dd - sum(out * Y * Xd)
-        h = dd + sum(Xd * sv * Xd)
-        step_sz = step_sz - g/h
-        if (g*g/h < 0.0000000001):
-            continue1 = 0
-    
-    #update weights
-    w = w + step_sz*s
-    Xw = Xw + step_sz*Xd
-    
-    out = 1 - Y * Xw
-    sv = ppred(out, 0, ">")
-    out = sv * out
-    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-    g_new = dot(transpose(X), (out * Y)) - lambda * w
-    
-    print("OBJ = " + obj)
-    debug_str = append(debug_str, iter + "," + obj)
-    
-    tmp = sum(s * g_old)
-    if(step_sz*tmp < epsilon*obj):
-        continue = 0
-    
-    #non-linear CG step
-    be = sum(g_new * g_new)/sum(g_old * g_old)
-    s = be * s + g_new
-    g_old = g_new
-    
-    iter = iter + 1
-
-
-save(w, $model, format=cmdLine_fmt)
-save(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# How to invoke this pydml script L2SVM.pydml?
+# Assume L2SVM_HOME is set to the home of the pydml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -python -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log"
+
+# Note about inputs: 
+# Assumes that labels (entries in Y) 
+# are set to either -1 or +1
+# or the result of recoding
+
+cmdLine_fmt=ifdef($fmt,"text")
+cmdLine_icpt=ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+X = load($X)
+Y = load($Y)
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(ppred(Y, check_min, "=="))
+num_max = sum(ppred(Y, check_max, "=="))
+if(num_min + num_max != nrow(Y)):
+    print("please check Y, it should contain only 2 labels")
+else:
+    if(check_min != -1 | check_max != +1):
+        Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+
+epsilon = cmdLine_tol
+lambda = cmdLine_reg
+maxiterations = cmdLine_maxiter
+intercept = cmdLine_icpt
+
+num_samples = nrow(X)
+dimensions = ncol(X)
+
+if (intercept == 1):
+    ones  = full(1, rows=num_samples, cols=1)
+    X = append(X, ones)
+
+num_rows_in_w = dimensions
+if(intercept == 1):
+    num_rows_in_w = num_rows_in_w + 1
+w = full(0, rows=num_rows_in_w, cols=1)
+
+g_old = dot(transpose(X), Y)
+s = g_old
+
+Xw = full(0, rows=nrow(X), cols=1)
+debug_str = "# Iter, Obj"
+iter = 0
+continue = 1
+while(continue == 1 & iter < maxiterations):
+    # minimizing primal obj along direction s
+    step_sz = 0
+    Xd = dot(X, s)
+    wd = lambda * sum(w * s)
+    dd = lambda * sum(s * s)
+    continue1 = 1
+    while(continue1 == 1):
+        tmp_Xw = Xw + step_sz*Xd
+        out = 1 - Y * (tmp_Xw)
+        sv = ppred(out, 0, ">")
+        out = out * sv
+        g = wd + step_sz*dd - sum(out * Y * Xd)
+        h = dd + sum(Xd * sv * Xd)
+        step_sz = step_sz - g/h
+        if (g*g/h < 0.0000000001):
+            continue1 = 0
+    
+    #update weights
+    w = w + step_sz*s
+    Xw = Xw + step_sz*Xd
+    
+    out = 1 - Y * Xw
+    sv = ppred(out, 0, ">")
+    out = sv * out
+    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+    g_new = dot(transpose(X), (out * Y)) - lambda * w
+    
+    print("OBJ = " + obj)
+    debug_str = append(debug_str, iter + "," + obj)
+    
+    tmp = sum(s * g_old)
+    if(step_sz*tmp < epsilon*obj):
+        continue = 0
+    
+    #non-linear CG step
+    be = sum(g_new * g_new)/sum(g_old * g_old)
+    s = be * s + g_new
+    g_old = g_new
+    
+    iter = iter + 1
+
+
+save(w, $model, format=cmdLine_fmt)
+save(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/l2svm/L2SVMTest.Rt
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/l2svm/L2SVMTest.Rt b/src/test/scripts/applications/l2svm/L2SVMTest.Rt
index cb0bce7..8bd3e90 100644
--- a/src/test/scripts/applications/l2svm/L2SVMTest.Rt
+++ b/src/test/scripts/applications/l2svm/L2SVMTest.Rt
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.applications.L2SVMTest.java
-library("Matrix")
-
-X = readMM("./test/scripts/applications/l2svm/in/X.mtx")
-Y = readMM("./test/scripts/applications/l2svm/in/Y.mtx")
-epsilon = 0.00000001
-lambda = 1
-
-N = nrow(X)
-D = ncol(X)
-
-w = matrix(0,D,1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-continue = TRUE
-while(continue){
-	t = 0
-	Xd = X %*% s
-	wd = lambda * sum(w * s)
-	dd = lambda * sum(s * s)
-	continue1 = TRUE
-	while(continue1){
-		tmp_w = w + t*s
-		out = 1 - Y * (X %*% tmp_w)
-		sv = which(out > 0)
-		g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv])
-		h = dd + sum(Xd[sv] * Xd[sv])
-		t = t - g/h
-		continue1 = (g*g/h >= 1e-10)
-	}
-	
-	w = w + t*s
-	
-	out = 1 - Y * (X %*% w)
-	sv = which(out > 0)
-	obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w)
-	g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w
-	
-	print(paste("OBJ : ", obj))
-
-	continue = (t*sum(s * g_old) >= epsilon*obj)
-	
-	be = sum(g_new * g_new)/sum(g_old * g_old)
-	s = be * s + g_new
-	g_old = g_new
-}
-
-writeMM(w, "./test/scripts/applications/l2svm/expected/w");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.applications.L2SVMTest.java
+library("Matrix")
+
+X = readMM("./test/scripts/applications/l2svm/in/X.mtx")
+Y = readMM("./test/scripts/applications/l2svm/in/Y.mtx")
+epsilon = 0.00000001
+lambda = 1
+
+N = nrow(X)
+D = ncol(X)
+
+w = matrix(0,D,1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+continue = TRUE
+while(continue){
+	t = 0
+	Xd = X %*% s
+	wd = lambda * sum(w * s)
+	dd = lambda * sum(s * s)
+	continue1 = TRUE
+	while(continue1){
+		tmp_w = w + t*s
+		out = 1 - Y * (X %*% tmp_w)
+		sv = which(out > 0)
+		g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv])
+		h = dd + sum(Xd[sv] * Xd[sv])
+		t = t - g/h
+		continue1 = (g*g/h >= 1e-10)
+	}
+	
+	w = w + t*s
+	
+	out = 1 - Y * (X %*% w)
+	sv = which(out > 0)
+	obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w)
+	g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w
+	
+	print(paste("OBJ : ", obj))
+
+	continue = (t*sum(s * g_old) >= epsilon*obj)
+	
+	be = sum(g_new * g_new)/sum(g_old * g_old)
+	s = be * s + g_new
+	g_old = g_new
+}
+
+writeMM(w, "./test/scripts/applications/l2svm/expected/w");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/l2svm/L2SVMTest.dmlt
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/l2svm/L2SVMTest.dmlt b/src/test/scripts/applications/l2svm/L2SVMTest.dmlt
index 79f4252..5145aa3 100644
--- a/src/test/scripts/applications/l2svm/L2SVMTest.dmlt
+++ b/src/test/scripts/applications/l2svm/L2SVMTest.dmlt
@@ -1,80 +1,80 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-X = read("./test/scripts/applications/l2svm/in/X", rows=1000, cols=100, format="text")
-Y = read("./test/scripts/applications/l2svm/in/y", rows=1000, cols=1, format="text")
-epsilon = 0.00000001
-lambda = 1
-
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-g_old = t(X) %*% Y
-s = g_old
-w = Rand(rows=dimensions, cols=1, min=0, max=0, pdf="uniform")
-
-iter = 0
-continue = 1
-while(continue == 1)  {
-	# minimizing primal obj along direction s
-	step_sz = 0
-	Xd = X %*% s
-	wd = lambda * sum(w * s)
-	dd = lambda * sum(s * s)
-	continue1 = 1
-	while(continue1 == 1){
-		tmp_w = w + step_sz*s
-		out = 1 - Y * (X %*% tmp_w)
-		sv = ppred(out, 0, ">")
-		out = out * sv
-		g = wd + step_sz*dd - sum(out * Y * Xd)
-		h = dd + sum(Xd * sv * Xd)
-		step_sz = step_sz - g/h
-		if (g*g/h < 0.0000000001){
-			continue1 = 0
-		}
-	}
-	
-	#update weights
-	w = w + step_sz*s
-	
-	out = 1 - Y * (X %*% w)
-	sv = ppred(out, 0, ">")
-	out = sv * out
-	obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-	g_new = t(X) %*% (out * Y) - lambda * w
-	
-	print("OBJ = " + obj)
-	
-	tmp = sum(s * g_old)
-	if(step_sz*tmp < epsilon*obj){
-		continue = 0
-	}
-	
-	#non-linear CG step
-	be = sum(g_new * g_new)/sum(g_old * g_old)
-	s = be * s + g_new
-	g_old = g_new
-
-	iter = iter + 1
-}
-
-write(w, "./test/scripts/applications/l2svm/out/w", format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read("./test/scripts/applications/l2svm/in/X", rows=1000, cols=100, format="text")
+Y = read("./test/scripts/applications/l2svm/in/y", rows=1000, cols=1, format="text")
+epsilon = 0.00000001
+lambda = 1
+
+num_samples = nrow(X)
+dimensions = ncol(X)
+
+g_old = t(X) %*% Y
+s = g_old
+w = Rand(rows=dimensions, cols=1, min=0, max=0, pdf="uniform")
+
+iter = 0
+continue = 1
+while(continue == 1)  {
+	# minimizing primal obj along direction s
+	step_sz = 0
+	Xd = X %*% s
+	wd = lambda * sum(w * s)
+	dd = lambda * sum(s * s)
+	continue1 = 1
+	while(continue1 == 1){
+		tmp_w = w + step_sz*s
+		out = 1 - Y * (X %*% tmp_w)
+		sv = ppred(out, 0, ">")
+		out = out * sv
+		g = wd + step_sz*dd - sum(out * Y * Xd)
+		h = dd + sum(Xd * sv * Xd)
+		step_sz = step_sz - g/h
+		if (g*g/h < 0.0000000001){
+			continue1 = 0
+		}
+	}
+	
+	#update weights
+	w = w + step_sz*s
+	
+	out = 1 - Y * (X %*% w)
+	sv = ppred(out, 0, ">")
+	out = sv * out
+	obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+	g_new = t(X) %*% (out * Y) - lambda * w
+	
+	print("OBJ = " + obj)
+	
+	tmp = sum(s * g_old)
+	if(step_sz*tmp < epsilon*obj){
+		continue = 0
+	}
+	
+	#non-linear CG step
+	be = sum(g_new * g_new)/sum(g_old * g_old)
+	s = be * s + g_new
+	g_old = g_new
+
+	iter = iter + 1
+}
+
+write(w, "./test/scripts/applications/l2svm/out/w", format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/linearLogReg/LinearLogReg.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.R b/src/test/scripts/applications/linearLogReg/LinearLogReg.R
index fe22c8f..3a35d3f 100644
--- a/src/test/scripts/applications/linearLogReg/LinearLogReg.R
+++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.R
@@ -1,217 +1,217 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.applications.LinearLogReg.java
-# command line invocation assuming $LLR_HOME is set to the home of the R script
-# Rscript $LLR_HOME/LinearLogReg.R $LLR_HOME/in/ $LLR_HOME/expected/
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-#library("batch")
-# Usage:  /home/vikas/R-2.10.1/bin/R --vanilla --args Xfile X yfile y Cval 2 tol 0.01 maxiter 100 < linearLogReg.r
-
-# Solves Linear Logistic Regression using Trust Region methods. 
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-
-options(warn=-1)
-
-C = 2; 
-tol = 0.001
-maxiter = 3
-maxinneriter = 3
-
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1 
-
-# read (training and test) data files -- should be in matrix market format. see data.mtx 
-X = readMM(paste(args[1], "X.mtx", sep=""));
-Xt = readMM(paste(args[1], "Xt.mtx", sep=""));
-
-N = nrow(X)
-D = ncol(X)
-Nt = nrow(Xt)
-
-# read (training and test) labels
-y = readMM(paste(args[1], "y.mtx", sep=""));
-yt = readMM(paste(args[1], "yt.mtx", sep=""));
-
-# initialize w
-w = matrix(0,D,1)
-o = X %*% w
-logistic = 1.0/(1.0 + exp(-y*o))
-
-# VS : change
-obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
-grad = w + C*t(X) %*% ((logistic - 1)*y)
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-zeros_D = matrix(0,D,1)
-# VS: change
-zeros_N = matrix(0,N,1)
-
-# boolean for convergence check
-
-converge = (delta < tol)
-norm_r2 = sum(grad*grad)
-gnorm = sqrt(norm_r2)
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-while(!converge) {
- 	
-	norm_grad = sqrt(sum(grad*grad))
-
-	print("next iteration..")
-	print(paste("Iterations : ",iter, "Objective : ", obj[1,1],  "Gradient Norm : ", norm_grad))
-    	 
-	# SOLVE TRUST REGION SUB-PROBLEM
-	s = zeros_D
-	os = zeros_N
-	r = -grad
-	d = r
-	innerconverge = (sqrt(sum(r*r)) <= psi*norm_grad)
-	inneriter = 0; 	
-	while(!innerconverge) {
-		inneriter = inneriter + 1
-		norm_r2 = sum(r*r)
-		od = X %*% d
-		Hd = d + C*(t(X) %*% (logisticD*od))
-		alpha_deno = t(d) %*% Hd 
-		alpha = norm_r2/alpha_deno
-		
-		s = s + alpha[1,1]*d
-		os = os + alpha[1,1]*od
-
-		sts = t(s) %*% s
-		delta2 = delta*delta 
-		
-		if (sts[1,1] > delta2) {
-			# VS: change 
-			print("cg reaches trust region boundary")
-			# VS: change
-			s = s - alpha[1,1]*d
-			os = os - alpha[1,1]*od
-			std = t(s) %*% d
-			dtd = t(d) %*% d
-			# VS:change
-			sts = t(s) %*% s
-			rad = sqrt(std*std + dtd*(delta2 - sts))
-			if(std[1,1]>=0) {
-				tau = (delta2 - sts)/(std + rad)
-			} 
-			else {
-				tau = (rad - std)/dtd
-			}	 
-			s = s + tau[1,1]*d
-			os = os + tau[1,1]*od
-			r = r - tau[1,1]*Hd
-			break
-		}
-		r = r - alpha[1,1]*Hd
-		old_norm_r2 = norm_r2 
-		norm_r2 = sum(r*r)
-		beta = norm_r2/old_norm_r2
-		d = r + beta*d
-		innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) # innerconverge = (sqrt(norm_r2) <= psi*norm_grad)
-	}
-	
-	print(paste("Inner CG Iteration = ", inneriter))
-	# END TRUST REGION SUB-PROBLEM
-	# compute rho, update w, obtain delta
-	gs = t(s) %*% grad
-	qk = -0.5*(gs - (t(s) %*% r))
-	
-	wnew = w + s
-	# VS Change X %*% wnew removed	
-	onew = o + os 
-	# VS: change
-	logisticnew = 1.0/(1.0 + exp(-y*onew))
-	objnew = 0.5 * t(wnew) %*% wnew + C*sum(-log(logisticnew))
-
-	# VS: change
-	actred = (obj - objnew)	
-	rho = actred/qk
-
-	print(paste("Actual :", actred[1,1], "Predicted :", qk[1,1]))
-
-	rho = rho[1,1]
-	snorm = sqrt(sum(s*s))
-
-	if(iter==0) {
-		delta = min(delta, snorm)
-	}
-	if (objnew[1,1] - obj[1,1] - gs[1,1] <= 0) {
-		alpha = sigma3;
-	}
-	else {
-		alpha = max(sigma1, -0.5*gs[1,1]/(objnew[1,1] - obj[1,1] - gs[1,1]))
-	}
-
-
-
-	if (rho > eta0) {
-	
-		w = wnew
-		o = onew
-		grad = w + C*t(X) %*% ((logisticnew - 1)*y)
-		# VS: change
-		norm_grad = sqrt(sum(grad*grad))
-		logisticD = logisticnew*(1-logisticnew)
-		obj = objnew
-
-	}
-	
-	if (rho < eta0)
-		{delta = min(max(alpha, sigma1)*snorm, sigma2*delta)}
-	else if (rho < eta1)
-		{delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta))}
-	else if (rho < eta2)
-		{delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta))}
-	else
-		{delta = max(delta, min(alpha*snorm, sigma3*delta))}
-
-	ot = Xt %*% w
-	correct = sum((yt*ot)>0)
-	iter = iter + 1
-	converge = (norm_grad < tol*norm_grad_initial) | (iter>maxiter)
-	
-	print(paste("Delta :", delta))
-	print(paste("Accuracy=", correct*100/Nt))
-	print(paste("OuterIter=", iter))
-	print(paste("Converge=", converge))
-}
-
-writeMM(as(w,"CsparseMatrix"), paste(args[2],"w", sep=""), format = "text")
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.applications.LinearLogReg.java
+# command line invocation assuming $LLR_HOME is set to the home of the R script
+# Rscript $LLR_HOME/LinearLogReg.R $LLR_HOME/in/ $LLR_HOME/expected/
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+#library("batch")
+# Usage:  /home/vikas/R-2.10.1/bin/R --vanilla --args Xfile X yfile y Cval 2 tol 0.01 maxiter 100 < linearLogReg.r
+
+# Solves Linear Logistic Regression using Trust Region methods. 
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+
+options(warn=-1)
+
+C = 2; 
+tol = 0.001
+maxiter = 3
+maxinneriter = 3
+
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1 
+
+# read (training and test) data files -- should be in matrix market format. see data.mtx 
+X = readMM(paste(args[1], "X.mtx", sep=""));
+Xt = readMM(paste(args[1], "Xt.mtx", sep=""));
+
+N = nrow(X)
+D = ncol(X)
+Nt = nrow(Xt)
+
+# read (training and test) labels
+y = readMM(paste(args[1], "y.mtx", sep=""));
+yt = readMM(paste(args[1], "yt.mtx", sep=""));
+
+# initialize w
+w = matrix(0,D,1)
+o = X %*% w
+logistic = 1.0/(1.0 + exp(-y*o))
+
+# VS : change
+obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
+grad = w + C*t(X) %*% ((logistic - 1)*y)
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+zeros_D = matrix(0,D,1)
+# VS: change
+zeros_N = matrix(0,N,1)
+
+# boolean for convergence check
+
+converge = (delta < tol)
+norm_r2 = sum(grad*grad)
+gnorm = sqrt(norm_r2)
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+while(!converge) {
+ 	
+	norm_grad = sqrt(sum(grad*grad))
+
+	print("next iteration..")
+	print(paste("Iterations : ",iter, "Objective : ", obj[1,1],  "Gradient Norm : ", norm_grad))
+    	 
+	# SOLVE TRUST REGION SUB-PROBLEM
+	s = zeros_D
+	os = zeros_N
+	r = -grad
+	d = r
+	innerconverge = (sqrt(sum(r*r)) <= psi*norm_grad)
+	inneriter = 0; 	
+	while(!innerconverge) {
+		inneriter = inneriter + 1
+		norm_r2 = sum(r*r)
+		od = X %*% d
+		Hd = d + C*(t(X) %*% (logisticD*od))
+		alpha_deno = t(d) %*% Hd 
+		alpha = norm_r2/alpha_deno
+		
+		s = s + alpha[1,1]*d
+		os = os + alpha[1,1]*od
+
+		sts = t(s) %*% s
+		delta2 = delta*delta 
+		
+		if (sts[1,1] > delta2) {
+			# VS: change 
+			print("cg reaches trust region boundary")
+			# VS: change
+			s = s - alpha[1,1]*d
+			os = os - alpha[1,1]*od
+			std = t(s) %*% d
+			dtd = t(d) %*% d
+			# VS:change
+			sts = t(s) %*% s
+			rad = sqrt(std*std + dtd*(delta2 - sts))
+			if(std[1,1]>=0) {
+				tau = (delta2 - sts)/(std + rad)
+			} 
+			else {
+				tau = (rad - std)/dtd
+			}	 
+			s = s + tau[1,1]*d
+			os = os + tau[1,1]*od
+			r = r - tau[1,1]*Hd
+			break
+		}
+		r = r - alpha[1,1]*Hd
+		old_norm_r2 = norm_r2 
+		norm_r2 = sum(r*r)
+		beta = norm_r2/old_norm_r2
+		d = r + beta*d
+		innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) # innerconverge = (sqrt(norm_r2) <= psi*norm_grad)
+	}
+	
+	print(paste("Inner CG Iteration = ", inneriter))
+	# END TRUST REGION SUB-PROBLEM
+	# compute rho, update w, obtain delta
+	gs = t(s) %*% grad
+	qk = -0.5*(gs - (t(s) %*% r))
+	
+	wnew = w + s
+	# VS Change X %*% wnew removed	
+	onew = o + os 
+	# VS: change
+	logisticnew = 1.0/(1.0 + exp(-y*onew))
+	objnew = 0.5 * t(wnew) %*% wnew + C*sum(-log(logisticnew))
+
+	# VS: change
+	actred = (obj - objnew)	
+	rho = actred/qk
+
+	print(paste("Actual :", actred[1,1], "Predicted :", qk[1,1]))
+
+	rho = rho[1,1]
+	snorm = sqrt(sum(s*s))
+
+	if(iter==0) {
+		delta = min(delta, snorm)
+	}
+	if (objnew[1,1] - obj[1,1] - gs[1,1] <= 0) {
+		alpha = sigma3;
+	}
+	else {
+		alpha = max(sigma1, -0.5*gs[1,1]/(objnew[1,1] - obj[1,1] - gs[1,1]))
+	}
+
+
+
+	if (rho > eta0) {
+	
+		w = wnew
+		o = onew
+		grad = w + C*t(X) %*% ((logisticnew - 1)*y)
+		# VS: change
+		norm_grad = sqrt(sum(grad*grad))
+		logisticD = logisticnew*(1-logisticnew)
+		obj = objnew
+
+	}
+	
+	if (rho < eta0)
+		{delta = min(max(alpha, sigma1)*snorm, sigma2*delta)}
+	else if (rho < eta1)
+		{delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta))}
+	else if (rho < eta2)
+		{delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta))}
+	else
+		{delta = max(delta, min(alpha*snorm, sigma3*delta))}
+
+	ot = Xt %*% w
+	correct = sum((yt*ot)>0)
+	iter = iter + 1
+	converge = (norm_grad < tol*norm_grad_initial) | (iter>maxiter)
+	
+	print(paste("Delta :", delta))
+	print(paste("Accuracy=", correct*100/Nt))
+	print(paste("OuterIter=", iter))
+	print(paste("Converge=", converge))
+}
+
+writeMM(as(w,"CsparseMatrix"), paste(args[2],"w", sep=""), format = "text")
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/linearLogReg/LinearLogReg.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml
index fb0bf00..cf2f7ad 100644
--- a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml
+++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml
@@ -1,231 +1,231 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Linear Logistic Regression using Trust Region methods. 
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script LinearLogReg.dml?
-# Assume LLR_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
-# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"
-
-C = 2
-tol = 0.001
-maxiter = 3
-maxinneriter = 3
-
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1 
-
-# read (training and test) data files
-X = read($1)
-Xt = read($2)
-N = nrow(X)
-D = ncol(X)
-Nt = nrow(Xt)
-
-# read (training and test) labels
-y = read($3)
-yt = read($4)
-
-#initialize w
-w = Rand(rows=D, cols=1, min=0.0, max=0.0);
-e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
-o = X %*% w
-logistic = 1.0/(1.0 + exp( -y * o))
-
-obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
-grad = w + C*t(X) %*% ((logistic - 1)*y)
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
-# VS: change
-zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
-
-# boolean for convergence check
-
-converge = (delta < tol) | (iter > maxiter)
-norm_r2 = sum(grad*grad)
-
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-alpha = t(w) %*% w
-alpha2 = alpha
-
-while(!converge) {
-	
-	norm_grad = sqrt(sum(grad*grad))
-	
-	print("-- Outer Iteration = " + iter)
-	objScalar = castAsScalar(obj)
-	print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
-	
-	# SOLVE TRUST REGION SUB-PROBLEM
-	s = zeros_D
-	os = zeros_N
-	r = -grad
-	d = r
-	inneriter = 0
-	innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
-	while (!innerconverge) {
-	    inneriter = inneriter + 1
-		norm_r2 = sum(r*r)
-		od = X %*% d
-		Hd = d + C*(t(X) %*% (logisticD*od))
-		alpha_deno = t(d) %*% Hd 
-		alpha = norm_r2 / alpha_deno
-	
-		s = s + castAsScalar(alpha) * d
-		os = os + castAsScalar(alpha) * od
-
-		sts = t(s) %*% s
-		delta2 = delta*delta 
-		stsScalar = castAsScalar(sts)
-		
-		shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
-		if (stsScalar > delta2) {
-		   	print("      --- cg reaches trust region boundary")
-			s = s - castAsScalar(alpha) * d
-			os = os - castAsScalar(alpha) * od
-			std = t(s) %*% d
-			dtd = t(d) %*% d
-			sts = t(s) %*% s
-			rad = sqrt(std*std + dtd*(delta2 - sts))
-			stdScalar = castAsScalar(std)
-			if(stdScalar >= 0) {
-				tau = (delta2 - sts)/(std + rad)
-			} 
-			else {
-				tau = (rad - std)/dtd
-			}
-						
-			s = s + castAsScalar(tau) * d
-			os = os + castAsScalar(tau) * od
-			r = r - castAsScalar(tau) * Hd
-			
-			#break
-			shouldBreak = TRUE;
-			innerconverge = TRUE;
-		
-		} 
-		
-		if (!shouldBreak) {
-			r = r - castAsScalar(alpha) * Hd
-			old_norm_r2 = norm_r2 
-			norm_r2 = sum(r*r)
-			beta = norm_r2/old_norm_r2
-			d = r + beta*d
-			innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
-		}
-	}  
-	
-	print("      --- Inner CG Iteration =  " + inneriter)
-	# END TRUST REGION SUB-PROBLEM
-	# compute rho, update w, obtain delta
-	gs = t(s) %*% grad
-	qk = -0.5*(gs - (t(s) %*% r))
-	
-	wnew = w + s	
-	onew = o + os
-	logisticnew = 1.0/(1.0 + exp(-y * onew ))
-	objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
-	
-	actred = (obj - objnew)
-	actredScalar = castAsScalar(actred)
-	rho = actred / qk
-	qkScalar = castAsScalar(qk)
-	rhoScalar = castAsScalar(rho);
-	snorm = sqrt(sum( s * s ))
-	print("     Actual    = " + actredScalar)
-	print("     Predicted = " + qkScalar)
-	
-	if (iter==0) {
-	   delta = min(delta, snorm)
-	}
-	alpha2 = objnew - obj - gs
-	alpha2Scalar = castAsScalar(alpha2)
-	if (alpha2Scalar <= 0) {
-	   alpha = sigma3*e
-	} 
-	else {
-	   ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
-	   alpha = ascalar*e
-	}
-
-	if (rhoScalar > eta0) {
-		
-		w = wnew
-		o = onew
-		grad = w + C*t(X) %*% ((logisticnew - 1) * y )
-		norm_grad = sqrt(sum(grad*grad))
-		logisticD = logisticnew * (1 - logisticnew)
-		obj = objnew	
-	} 
-
-	alphaScalar = castAsScalar(alpha)
-	if (rhoScalar < eta0){
-		delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
-	}
-	else {
-		if (rhoScalar < eta1){
-			delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
-		}
-		else { 
-			if (rhoScalar < eta2) {
-				delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
-			}
-			else {
-				delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
-			}
-		}
-	} 
-	
-	
-	ot = Xt %*% w
-	ot2 = yt * ot
-	correct = sum(ppred(ot2, 0, ">"))
-	accuracy = correct*100.0/Nt 
-	iter = iter + 1
-	converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
-
-	print("     Delta =  " + delta)
-	print("     Accuracy =  " +  accuracy)
-	print("     Correct =  " + correct)
-	print("     OuterIter =  " + iter)
-	print("     Converge =  " + converge)
-} 
-
-write(w, $5, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Linear Logistic Regression using Trust Region methods. 
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script LinearLogReg.dml?
+# Assume LLR_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
+# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"
+
+C = 2
+tol = 0.001
+maxiter = 3
+maxinneriter = 3
+
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1 
+
+# read (training and test) data files
+X = read($1)
+Xt = read($2)
+N = nrow(X)
+D = ncol(X)
+Nt = nrow(Xt)
+
+# read (training and test) labels
+y = read($3)
+yt = read($4)
+
+#initialize w
+w = Rand(rows=D, cols=1, min=0.0, max=0.0);
+e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
+o = X %*% w
+logistic = 1.0/(1.0 + exp( -y * o))
+
+obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
+grad = w + C*t(X) %*% ((logistic - 1)*y)
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
+# VS: change
+zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
+
+# boolean for convergence check
+
+converge = (delta < tol) | (iter > maxiter)
+norm_r2 = sum(grad*grad)
+
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+alpha = t(w) %*% w
+alpha2 = alpha
+
+while(!converge) {
+	
+	norm_grad = sqrt(sum(grad*grad))
+	
+	print("-- Outer Iteration = " + iter)
+	objScalar = castAsScalar(obj)
+	print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
+	
+	# SOLVE TRUST REGION SUB-PROBLEM
+	s = zeros_D
+	os = zeros_N
+	r = -grad
+	d = r
+	inneriter = 0
+	innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
+	while (!innerconverge) {
+	    inneriter = inneriter + 1
+		norm_r2 = sum(r*r)
+		od = X %*% d
+		Hd = d + C*(t(X) %*% (logisticD*od))
+		alpha_deno = t(d) %*% Hd 
+		alpha = norm_r2 / alpha_deno
+	
+		s = s + castAsScalar(alpha) * d
+		os = os + castAsScalar(alpha) * od
+
+		sts = t(s) %*% s
+		delta2 = delta*delta 
+		stsScalar = castAsScalar(sts)
+		
+		shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
+		if (stsScalar > delta2) {
+		   	print("      --- cg reaches trust region boundary")
+			s = s - castAsScalar(alpha) * d
+			os = os - castAsScalar(alpha) * od
+			std = t(s) %*% d
+			dtd = t(d) %*% d
+			sts = t(s) %*% s
+			rad = sqrt(std*std + dtd*(delta2 - sts))
+			stdScalar = castAsScalar(std)
+			if(stdScalar >= 0) {
+				tau = (delta2 - sts)/(std + rad)
+			} 
+			else {
+				tau = (rad - std)/dtd
+			}
+						
+			s = s + castAsScalar(tau) * d
+			os = os + castAsScalar(tau) * od
+			r = r - castAsScalar(tau) * Hd
+			
+			#break
+			shouldBreak = TRUE;
+			innerconverge = TRUE;
+		
+		} 
+		
+		if (!shouldBreak) {
+			r = r - castAsScalar(alpha) * Hd
+			old_norm_r2 = norm_r2 
+			norm_r2 = sum(r*r)
+			beta = norm_r2/old_norm_r2
+			d = r + beta*d
+			innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
+		}
+	}  
+	
+	print("      --- Inner CG Iteration =  " + inneriter)
+	# END TRUST REGION SUB-PROBLEM
+	# compute rho, update w, obtain delta
+	gs = t(s) %*% grad
+	qk = -0.5*(gs - (t(s) %*% r))
+	
+	wnew = w + s	
+	onew = o + os
+	logisticnew = 1.0/(1.0 + exp(-y * onew ))
+	objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
+	
+	actred = (obj - objnew)
+	actredScalar = castAsScalar(actred)
+	rho = actred / qk
+	qkScalar = castAsScalar(qk)
+	rhoScalar = castAsScalar(rho);
+	snorm = sqrt(sum( s * s ))
+	print("     Actual    = " + actredScalar)
+	print("     Predicted = " + qkScalar)
+	
+	if (iter==0) {
+	   delta = min(delta, snorm)
+	}
+	alpha2 = objnew - obj - gs
+	alpha2Scalar = castAsScalar(alpha2)
+	if (alpha2Scalar <= 0) {
+	   alpha = sigma3*e
+	} 
+	else {
+	   ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
+	   alpha = ascalar*e
+	}
+
+	if (rhoScalar > eta0) {
+		
+		w = wnew
+		o = onew
+		grad = w + C*t(X) %*% ((logisticnew - 1) * y )
+		norm_grad = sqrt(sum(grad*grad))
+		logisticD = logisticnew * (1 - logisticnew)
+		obj = objnew	
+	} 
+
+	alphaScalar = castAsScalar(alpha)
+	if (rhoScalar < eta0){
+		delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
+	}
+	else {
+		if (rhoScalar < eta1){
+			delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
+		}
+		else { 
+			if (rhoScalar < eta2) {
+				delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
+			}
+			else {
+				delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
+			}
+		}
+	} 
+	
+	
+	ot = Xt %*% w
+	ot2 = yt * ot
+	correct = sum(ppred(ot2, 0, ">"))
+	accuracy = correct*100.0/Nt 
+	iter = iter + 1
+	converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
+
+	print("     Delta =  " + delta)
+	print("     Accuracy =  " +  accuracy)
+	print("     Correct =  " + correct)
+	print("     OuterIter =  " + iter)
+	print("     Converge =  " + converge)
+} 
+
+write(w, $5, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml
index a8dc934..1a9e769 100644
--- a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml
+++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml
@@ -1,214 +1,214 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Linear Logistic Regression using Trust Region methods. 
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this pydml script LinearLogReg.pydml?
-# Assume LLR_HOME is set to the home of the pydml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
-# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.pydml -python -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"
-
-C = 2
-tol = 0.001
-maxiter = 3
-maxinneriter = 3
-
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1
-
-# load (training and test) data files
-X = load($1)
-Xt = load($2)
-N = nrow(X)
-D = ncol(X)
-Nt = nrow(Xt)
-
-# load (training and test) labels
-y = load($3)
-yt = load($4)
-
-#initialize w
-w = Rand(rows=D, cols=1, min=0.0, max=0.0)
-e = Rand(rows=1, cols=1, min=1.0, max=1.0)
-o = dot(X, w)
-logistic = 1.0/(1.0 + exp( -y * o))
-
-# CHECK ORDER OF OPERATIONS HERE
-obj = dot((0.5 * transpose(w)), w) + C*sum(-log(logistic))
-grad = w + dot(C*transpose(X), ((logistic - 1)*y))
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0)
-# VS: change
-zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0)
-
-# boolean for convergence check
-
-converge = (delta < tol) | (iter > maxiter)
-norm_r2 = sum(grad*grad)
-
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-alpha = dot(transpose(w), w)
-alpha2 = alpha
-
-while(!converge):
-    
-    norm_grad = sqrt(sum(grad*grad))
-    
-    print("-- Outer Iteration = " + iter)
-    objScalar = castAsScalar(obj)
-    print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
-    
-    # SOLVE TRUST REGION SUB-PROBLEM
-    s = zeros_D
-    os = zeros_N
-    r = -grad
-    d = r
-    inneriter = 0
-    innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
-    while (!innerconverge):
-        inneriter = inneriter + 1
-        norm_r2 = sum(r*r)
-        od = dot(X, d)
-        Hd = d + C*(dot(transpose(X), (logisticD*od)))
-        alpha_deno = dot(transpose(d), Hd)
-        alpha = norm_r2 / alpha_deno
-        
-        s = s + castAsScalar(alpha) * d
-        os = os + castAsScalar(alpha) * od
-        
-        sts = dot(transpose(s), s)
-        delta2 = delta*delta 
-        stsScalar = castAsScalar(sts)
-        
-        shouldBreak = False  # to mimic "break" in the following 'if' condition
-        if (stsScalar > delta2):
-            print("      --- cg reaches trust region boundary")
-            s = s - castAsScalar(alpha) * d
-            os = os - castAsScalar(alpha) * od
-            std = dot(transpose(s), d)
-            dtd = dot(transpose(d), d)
-            sts = dot(transpose(s), s)
-            rad = sqrt(std*std + dtd*(delta2 - sts))
-            stdScalar = castAsScalar(std)
-            if(stdScalar >= 0):
-                tau = (delta2 - sts)/(std + rad)
-            else:
-                tau = (rad - std)/dtd
-            
-            s = s + castAsScalar(tau) * d
-            os = os + castAsScalar(tau) * od
-            r = r - castAsScalar(tau) * Hd
-            
-            #break
-            shouldBreak = True
-            innerconverge = True
-        if (!shouldBreak):
-            r = r - castAsScalar(alpha) * Hd
-            old_norm_r2 = norm_r2
-            norm_r2 = sum(r*r)
-            beta = norm_r2/old_norm_r2
-            d = r + beta*d
-            innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
-    # end while (!innerconverge)
-    
-    print("      --- Inner CG Iteration =  " + inneriter)
-    # END TRUST REGION SUB-PROBLEM
-    # compute rho, update w, obtain delta
-    gs = dot(transpose(s), grad)
-    qk = -0.5*(gs - (dot(transpose(s), r)))
-    
-    wnew = w + s    
-    onew = o + os
-    logisticnew = 1.0/(1.0 + exp(-y * onew ))
-    objnew = dot((0.5 * transpose(wnew)), wnew) + C * sum(-log(logisticnew))
-    
-    actred = (obj - objnew)
-    actredScalar = castAsScalar(actred)
-    rho = actred / qk
-    qkScalar = castAsScalar(qk)
-    rhoScalar = castAsScalar(rho)
-    snorm = sqrt(sum( s * s ))
-    print("     Actual    = " + actredScalar)
-    print("     Predicted = " + qkScalar)
-    
-    if (iter==0):
-        delta = min(delta, snorm)
-    alpha2 = objnew - obj - gs
-    alpha2Scalar = castAsScalar(alpha2)
-    if (alpha2Scalar <= 0):
-        alpha = sigma3*e
-    else:
-        ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)
-        alpha = ascalar*e
-    
-    if (rhoScalar > eta0):
-        w = wnew
-        o = onew
-        grad = w + dot(C*transpose(X), ((logisticnew - 1) * y ))
-        norm_grad = sqrt(sum(grad*grad))
-        logisticD = logisticnew * (1 - logisticnew)
-        obj = objnew
-    
-    alphaScalar = castAsScalar(alpha)
-    if (rhoScalar < eta0):
-        delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
-    else:
-        if (rhoScalar < eta1):
-            delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
-        else:
-            if (rhoScalar < eta2):
-                delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
-            else:
-                delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
-    
-    ot = dot(Xt, w)
-    ot2 = yt * ot
-    correct = sum(ppred(ot2, 0, ">"))
-    accuracy = correct*100.0/Nt 
-    iter = iter + 1
-    converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
-    
-    print("     Delta =  " + delta)
-    print("     Accuracy =  " +  accuracy)
-    print("     Correct =  " + correct)
-    print("     OuterIter =  " + iter)
-    print("     Converge =  " + converge)
-# end while(!converge)
-
-save(w, $5, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Linear Logistic Regression using Trust Region methods. 
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this pydml script LinearLogReg.pydml?
+# Assume LLR_HOME is set to the home of the pydml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt
+# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.pydml -python -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w"
+
+C = 2
+tol = 0.001
+maxiter = 3
+maxinneriter = 3
+
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1
+
+# load (training and test) data files
+X = load($1)
+Xt = load($2)
+N = nrow(X)
+D = ncol(X)
+Nt = nrow(Xt)
+
+# load (training and test) labels
+y = load($3)
+yt = load($4)
+
+#initialize w
+w = Rand(rows=D, cols=1, min=0.0, max=0.0)
+e = Rand(rows=1, cols=1, min=1.0, max=1.0)
+o = dot(X, w)
+logistic = 1.0/(1.0 + exp( -y * o))
+
+# CHECK ORDER OF OPERATIONS HERE
+obj = dot((0.5 * transpose(w)), w) + C*sum(-log(logistic))
+grad = w + dot(C*transpose(X), ((logistic - 1)*y))
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0)
+# VS: change
+zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0)
+
+# boolean for convergence check
+
+converge = (delta < tol) | (iter > maxiter)
+norm_r2 = sum(grad*grad)
+
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+alpha = dot(transpose(w), w)
+alpha2 = alpha
+
+while(!converge):
+    
+    norm_grad = sqrt(sum(grad*grad))
+    
+    print("-- Outer Iteration = " + iter)
+    objScalar = castAsScalar(obj)
+    print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
+    
+    # SOLVE TRUST REGION SUB-PROBLEM
+    s = zeros_D
+    os = zeros_N
+    r = -grad
+    d = r
+    inneriter = 0
+    innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
+    while (!innerconverge):
+        inneriter = inneriter + 1
+        norm_r2 = sum(r*r)
+        od = dot(X, d)
+        Hd = d + C*(dot(transpose(X), (logisticD*od)))
+        alpha_deno = dot(transpose(d), Hd)
+        alpha = norm_r2 / alpha_deno
+        
+        s = s + castAsScalar(alpha) * d
+        os = os + castAsScalar(alpha) * od
+        
+        sts = dot(transpose(s), s)
+        delta2 = delta*delta 
+        stsScalar = castAsScalar(sts)
+        
+        shouldBreak = False  # to mimic "break" in the following 'if' condition
+        if (stsScalar > delta2):
+            print("      --- cg reaches trust region boundary")
+            s = s - castAsScalar(alpha) * d
+            os = os - castAsScalar(alpha) * od
+            std = dot(transpose(s), d)
+            dtd = dot(transpose(d), d)
+            sts = dot(transpose(s), s)
+            rad = sqrt(std*std + dtd*(delta2 - sts))
+            stdScalar = castAsScalar(std)
+            if(stdScalar >= 0):
+                tau = (delta2 - sts)/(std + rad)
+            else:
+                tau = (rad - std)/dtd
+            
+            s = s + castAsScalar(tau) * d
+            os = os + castAsScalar(tau) * od
+            r = r - castAsScalar(tau) * Hd
+            
+            #break
+            shouldBreak = True
+            innerconverge = True
+        if (!shouldBreak):
+            r = r - castAsScalar(alpha) * Hd
+            old_norm_r2 = norm_r2
+            norm_r2 = sum(r*r)
+            beta = norm_r2/old_norm_r2
+            d = r + beta*d
+            innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
+    # end while (!innerconverge)
+    
+    print("      --- Inner CG Iteration =  " + inneriter)
+    # END TRUST REGION SUB-PROBLEM
+    # compute rho, update w, obtain delta
+    gs = dot(transpose(s), grad)
+    qk = -0.5*(gs - (dot(transpose(s), r)))
+    
+    wnew = w + s    
+    onew = o + os
+    logisticnew = 1.0/(1.0 + exp(-y * onew ))
+    objnew = dot((0.5 * transpose(wnew)), wnew) + C * sum(-log(logisticnew))
+    
+    actred = (obj - objnew)
+    actredScalar = castAsScalar(actred)
+    rho = actred / qk
+    qkScalar = castAsScalar(qk)
+    rhoScalar = castAsScalar(rho)
+    snorm = sqrt(sum( s * s ))
+    print("     Actual    = " + actredScalar)
+    print("     Predicted = " + qkScalar)
+    
+    if (iter==0):
+        delta = min(delta, snorm)
+    alpha2 = objnew - obj - gs
+    alpha2Scalar = castAsScalar(alpha2)
+    if (alpha2Scalar <= 0):
+        alpha = sigma3*e
+    else:
+        ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)
+        alpha = ascalar*e
+    
+    if (rhoScalar > eta0):
+        w = wnew
+        o = onew
+        grad = w + dot(C*transpose(X), ((logisticnew - 1) * y ))
+        norm_grad = sqrt(sum(grad*grad))
+        logisticD = logisticnew * (1 - logisticnew)
+        obj = objnew
+    
+    alphaScalar = castAsScalar(alpha)
+    if (rhoScalar < eta0):
+        delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
+    else:
+        if (rhoScalar < eta1):
+            delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
+        else:
+            if (rhoScalar < eta2):
+                delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
+            else:
+                delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
+    
+    ot = dot(Xt, w)
+    ot2 = yt * ot
+    correct = sum(ppred(ot2, 0, ">"))
+    accuracy = correct*100.0/Nt 
+    iter = iter + 1
+    converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
+    
+    print("     Delta =  " + delta)
+    print("     Accuracy =  " +  accuracy)
+    print("     Correct =  " + correct)
+    print("     OuterIter =  " + iter)
+    print("     Converge =  " + converge)
+# end while(!converge)
+
+save(w, $5, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/m-svm/m-svm.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.R b/src/test/scripts/applications/m-svm/m-svm.R
index 01f5d6a..e4eb562 100644
--- a/src/test/scripts/applications/m-svm/m-svm.R
+++ b/src/test/scripts/applications/m-svm/m-svm.R
@@ -1,120 +1,120 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-
-check_X = sum(X)
-if(check_X == 0){
-	print("X has no non-zeros")
-}else{
-	Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-	intercept = as.integer(args[6])
-	num_classes = as.integer(args[2])
-	epsilon = as.double(args[3])
-	lambda = as.double(args[4])
-	max_iterations = as.integer(args[5])
- 
-	num_samples = nrow(X)
-	num_features = ncol(X)
-
-	if (intercept == 1) {
- 		ones  = matrix(1, num_samples, 1);
- 		X = cbind(X, ones);
-	}
-
-	num_rows_in_w = num_features
-	if(intercept == 1){
-		num_rows_in_w = num_rows_in_w + 1
-	}
-	w = matrix(0, num_rows_in_w, num_classes)
-
-	debug_mat = matrix(-1, max_iterations, num_classes)
-	for(iter_class in 1:num_classes){		  
-		Y_local = 2 * (Y == iter_class) - 1
-		w_class = matrix(0, num_features, 1)
-		if (intercept == 1) {
-			zero_matrix = matrix(0, 1, 1);
- 			w_class = t(cbind(t(w_class), zero_matrix));
- 		}
- 
-		g_old = t(X) %*% Y_local
- 		s = g_old
-
-		Xw = matrix(0, nrow(X), 1)
-		iter = 0
- 		continue = 1
- 		while(continue == 1)  {
-  			# minimizing primal obj along direction s
-  			step_sz = 0
-  			Xd = X %*% s
-  			wd = lambda * sum(w_class * s)
-  			dd = lambda * sum(s * s)
-  			continue1 = 1
-  			while(continue1 == 1){
-   				tmp_Xw = Xw + step_sz*Xd
-   				out = 1 - Y_local * (tmp_Xw)
-   				sv = (out > 0)
-   				out = out * sv
-   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
-   				h = dd + sum(Xd * sv * Xd)
-   				step_sz = step_sz - g/h
-   				if (g*g/h < 0.0000000001){
-    				continue1 = 0
-   				}
-  			}
- 
-  			#update weights
-  			w_class = w_class + step_sz*s
- 			Xw = Xw + step_sz*Xd
- 
-  			out = 1 - Y_local * Xw
-  			sv = (out > 0)
-  			out = sv * out
-  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-  			tmp = sum(s * g_old)
-  
-  			train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100
-  			print(paste("For class ", iter_class, " iteration ", iter, " training accuracy: ", train_acc, sep=""))
-  			debug_mat[iter+1,iter_class] = obj	   
-   
-  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-   				continue = 0
-  			}
- 
-  			#non-linear CG step
-  			be = sum(g_new * g_new)/sum(g_old * g_old)
-  			s = be * s + g_new
-  			g_old = g_new
-
-  			iter = iter + 1
- 		}
-
-		w[,iter_class] = w_class
-	}
-
-	writeMM(as(w, "CsparseMatrix"), paste(args[7], "w", sep=""))
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+
+check_X = sum(X)
+if(check_X == 0){
+	print("X has no non-zeros")
+}else{
+	Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+	intercept = as.integer(args[6])
+	num_classes = as.integer(args[2])
+	epsilon = as.double(args[3])
+	lambda = as.double(args[4])
+	max_iterations = as.integer(args[5])
+ 
+	num_samples = nrow(X)
+	num_features = ncol(X)
+
+	if (intercept == 1) {
+ 		ones  = matrix(1, num_samples, 1);
+ 		X = cbind(X, ones);
+	}
+
+	num_rows_in_w = num_features
+	if(intercept == 1){
+		num_rows_in_w = num_rows_in_w + 1
+	}
+	w = matrix(0, num_rows_in_w, num_classes)
+
+	debug_mat = matrix(-1, max_iterations, num_classes)
+	for(iter_class in 1:num_classes){		  
+		Y_local = 2 * (Y == iter_class) - 1
+		w_class = matrix(0, num_features, 1)
+		if (intercept == 1) {
+			zero_matrix = matrix(0, 1, 1);
+ 			w_class = t(cbind(t(w_class), zero_matrix));
+ 		}
+ 
+		g_old = t(X) %*% Y_local
+ 		s = g_old
+
+		Xw = matrix(0, nrow(X), 1)
+		iter = 0
+ 		continue = 1
+ 		while(continue == 1)  {
+  			# minimizing primal obj along direction s
+  			step_sz = 0
+  			Xd = X %*% s
+  			wd = lambda * sum(w_class * s)
+  			dd = lambda * sum(s * s)
+  			continue1 = 1
+  			while(continue1 == 1){
+   				tmp_Xw = Xw + step_sz*Xd
+   				out = 1 - Y_local * (tmp_Xw)
+   				sv = (out > 0)
+   				out = out * sv
+   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
+   				h = dd + sum(Xd * sv * Xd)
+   				step_sz = step_sz - g/h
+   				if (g*g/h < 0.0000000001){
+    				continue1 = 0
+   				}
+  			}
+ 
+  			#update weights
+  			w_class = w_class + step_sz*s
+ 			Xw = Xw + step_sz*Xd
+ 
+  			out = 1 - Y_local * Xw
+  			sv = (out > 0)
+  			out = sv * out
+  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+  			tmp = sum(s * g_old)
+  
+  			train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100
+  			print(paste("For class ", iter_class, " iteration ", iter, " training accuracy: ", train_acc, sep=""))
+  			debug_mat[iter+1,iter_class] = obj	   
+   
+  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+   				continue = 0
+  			}
+ 
+  			#non-linear CG step
+  			be = sum(g_new * g_new)/sum(g_old * g_old)
+  			s = be * s + g_new
+  			g_old = g_new
+
+  			iter = iter + 1
+ 		}
+
+		w[,iter_class] = w_class
+	}
+
+	writeMM(as(w, "CsparseMatrix"), paste(args[7], "w", sep=""))
+}



[10/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_3.data.single
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_3.data.single b/src/test/scripts/functions/io/csv/in/transfusion_3.data.single
index 1ec0901..1a35bfe 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_3.data.single
+++ b/src/test/scripts/functions/io/csv/in/transfusion_3.data.single
@@ -1,749 +1,749 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-0
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-0 b/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-0
index 831646e..c1b8ba5 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-0
+++ b/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-0
@@ -1,691 +1,691 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
 14 ,1,250,14 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-1
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-1 b/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-1
index e955660..13e9668 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-1
+++ b/src/test/scripts/functions/io/csv/in/transfusion_3.data/part-1
@@ -1,58 +1,58 @@
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/writecsv_verify.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/writecsv_verify.R b/src/test/scripts/functions/io/csv/writecsv_verify.R
index 28299e7..5560c29 100644
--- a/src/test/scripts/functions/io/csv/writecsv_verify.R
+++ b/src/test/scripts/functions/io/csv/writecsv_verify.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library(Matrix);
-
-A = read.csv(args[1], header=as.logical(args[2]), sep=args[3]);
-A[is.na(A)] = 0;
-x =  sum(A);
-write(x, args[4]);
-
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library(Matrix);
+
+A = read.csv(args[1], header=as.logical(args[2]), sep=args[3]);
+A[is.na(A)] = 0;
+x =  sum(A);
+write(x, args[4]);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/ReadMMTest_1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_1.dml b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_1.dml
index b413e15..6255014 100644
--- a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_1.dml
+++ b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_1.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-# Script that checks reading of MatrixMarket file
-# format="mm" is not provided in read() statement to test the 
-# code that infers the MM format
-
-A = read($1);
-x = sum(A);
-write(x, $2);
-
-
+
+# Script that checks reading of MatrixMarket file
+# format="mm" is not provided in read() statement to test the 
+# code that infers the MM format
+
+A = read($1);
+x = sum(A);
+write(x, $2);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/ReadMMTest_2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_2.dml b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_2.dml
index 1c969d5..9b293ee 100644
--- a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_2.dml
+++ b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_2.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-# Script that checks reading of MatrixMarket file
-
-A = read($1, format="mm");
-x = sum(A);
-write(x, $2);
-
-
+
+# Script that checks reading of MatrixMarket file
+
+A = read($1, format="mm");
+x = sum(A);
+write(x, $2);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/ReadMMTest_3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_3.dml b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_3.dml
index 792e246..48b143d 100644
--- a/src/test/scripts/functions/io/matrixmarket/ReadMMTest_3.dml
+++ b/src/test/scripts/functions/io/matrixmarket/ReadMMTest_3.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-# Script that checks reading of MatrixMarket file
-
-A = readMM($1);
-x = sum(A);
-write(x, $2);
-
-
+
+# Script that checks reading of MatrixMarket file
+
+A = readMM($1);
+x = sum(A);
+write(x, $2);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/in/ReadMMTest.mtx
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/in/ReadMMTest.mtx b/src/test/scripts/functions/io/matrixmarket/in/ReadMMTest.mtx
index a9c2f96..75340fd 100644
--- a/src/test/scripts/functions/io/matrixmarket/in/ReadMMTest.mtx
+++ b/src/test/scripts/functions/io/matrixmarket/in/ReadMMTest.mtx
@@ -1,31 +1,31 @@
-%%MatrixMarket matrix coordinate real general
-%=================================================================================
-%
-% This ASCII file represents a sparse MxN matrix with L 
-% nonzeros in the following Matrix Market format:
-%
-% +----------------------------------------------+
-% |%%MatrixMarket matrix coordinate real general | <--- header line
-% |%                                             | <--+
-% |% comments                                    |    |-- 0 or more comment lines
-% |%                                             | <--+         
-% |    M  N  L                                   | <--- rows, columns, entries
-% |    I1  J1  A(I1, J1)                         | <--+
-% |    I2  J2  A(I2, J2)                         |    |
-% |    I3  J3  A(I3, J3)                         |    |-- L lines
-% |        . . .                                 |    |
-% |    IL JL  A(IL, JL)                          | <--+
-% +----------------------------------------------+   
-%
-% Indices are 1-based, i.e. A(1,1) is the first element.
-%
-%=================================================================================
-  5  5  8
-    1     1   1.000e+00
-    2     2   1.050e+01
-    3     3   1.500e-02
-    1     4   6.000e+00
-    4     2   2.505e+02
-    4     4  -2.800e+02
-    4     5   3.332e+01
+%%MatrixMarket matrix coordinate real general
+%=================================================================================
+%
+% This ASCII file represents a sparse MxN matrix with L 
+% nonzeros in the following Matrix Market format:
+%
+% +----------------------------------------------+
+% |%%MatrixMarket matrix coordinate real general | <--- header line
+% |%                                             | <--+
+% |% comments                                    |    |-- 0 or more comment lines
+% |%                                             | <--+         
+% |    M  N  L                                   | <--- rows, columns, entries
+% |    I1  J1  A(I1, J1)                         | <--+
+% |    I2  J2  A(I2, J2)                         |    |
+% |    I3  J3  A(I3, J3)                         |    |-- L lines
+% |        . . .                                 |    |
+% |    IL JL  A(IL, JL)                          | <--+
+% +----------------------------------------------+   
+%
+% Indices are 1-based, i.e. A(1,1) is the first element.
+%
+%=================================================================================
+  5  5  8
+    1     1   1.000e+00
+    2     2   1.050e+01
+    3     3   1.500e-02
+    1     4   6.000e+00
+    4     2   2.505e+02
+    4     4  -2.800e+02
+    4     5   3.332e+01
     5     5   1.200e+01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/mm_test1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/mm_test1.dml b/src/test/scripts/functions/io/matrixmarket/mm_test1.dml
index 5e04c8e..9e87a71 100644
--- a/src/test/scripts/functions/io/matrixmarket/mm_test1.dml
+++ b/src/test/scripts/functions/io/matrixmarket/mm_test1.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-
-# test for reading and writing in MM format
-
-A = read($1, format=$2);
-write(A, $3, format=$4);
-
+
+
+# test for reading and writing in MM format
+
+A = read($1, format=$2);
+write(A, $3, format=$4);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/mm_verify.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/mm_verify.R b/src/test/scripts/functions/io/matrixmarket/mm_verify.R
index 6a6c5f1..3c80552 100644
--- a/src/test/scripts/functions/io/matrixmarket/mm_verify.R
+++ b/src/test/scripts/functions/io/matrixmarket/mm_verify.R
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library(Matrix);
-
-A = readMM(args[1]);
-x =  sum(A);
-write(x, args[2]);
-
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library(Matrix);
+
+A = readMM(args[1]);
+x =  sum(A);
+write(x, args[2]);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/matrixmarket/mm_verify.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/matrixmarket/mm_verify.dml b/src/test/scripts/functions/io/matrixmarket/mm_verify.dml
index 3526fbc..190d359 100644
--- a/src/test/scripts/functions/io/matrixmarket/mm_verify.dml
+++ b/src/test/scripts/functions/io/matrixmarket/mm_verify.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1, rows=$2, cols=$3, format=$4);
-x = sum(A);
-write(x, $5);
-
-
+
+
+A = read($1, rows=$2, cols=$3, format=$4);
+x = sum(A);
+write(x, $5);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/jmlc/m-svm-score.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/m-svm-score.R b/src/test/scripts/functions/jmlc/m-svm-score.R
index 74a04a7..f69e5e8 100644
--- a/src/test/scripts/functions/jmlc/m-svm-score.R
+++ b/src/test/scripts/functions/jmlc/m-svm-score.R
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-W <- as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-Nt = nrow(X);
-num_classes = ncol(W)
-n = ncol(X);
-
-b = W[n+1,]
-ones = matrix(1, Nt, 1)
-scores = X %*% W[1:n,] + ones %*% b;
-
-predicted_y = max.col(scores,ties.method="last")
-
-writeMM(as(predicted_y, "CsparseMatrix"), paste(args[2], "predicted_y", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+W <- as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+Nt = nrow(X);
+num_classes = ncol(W)
+n = ncol(X);
+
+b = W[n+1,]
+ones = matrix(1, Nt, 1)
+scores = X %*% W[1:n,] + ones %*% b;
+
+predicted_y = max.col(scores,ties.method="last")
+
+writeMM(as(predicted_y, "CsparseMatrix"), paste(args[2], "predicted_y", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/jmlc/m-svm-score.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/m-svm-score.dml b/src/test/scripts/functions/jmlc/m-svm-score.dml
index 3b70100..a371e53 100644
--- a/src/test/scripts/functions/jmlc/m-svm-score.dml
+++ b/src/test/scripts/functions/jmlc/m-svm-score.dml
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-X = read("./tmp/X", rows=-1, cols=-1);
-W = read("./tmp/W", rows=-1, cols=-1);
-
-Nt = nrow(X);
-num_classes = ncol(W)
-n = ncol(X);
-
-b = W[n+1,]
-ones = matrix(1, rows=Nt, cols=1)
-scores = X %*% W[1:n,] + ones %*% b;
-
-predicted_y = rowIndexMax(scores);
-write(predicted_y, "./tmp", format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read("./tmp/X", rows=-1, cols=-1);
+W = read("./tmp/W", rows=-1, cols=-1);
+
+Nt = nrow(X);
+num_classes = ncol(W)
+n = ncol(X);
+
+b = W[n+1,]
+ones = matrix(1, rows=Nt, cols=1)
+scores = X %*% W[1:n,] + ones %*% b;
+
+predicted_y = rowIndexMax(scores);
+write(predicted_y, "./tmp", format="text");
   
\ No newline at end of file


[40/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/stratstats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/stratstats.dml b/scripts/algorithms/stratstats.dml
index fc846f3..2b7425d 100644
--- a/scripts/algorithms/stratstats.dml
+++ b/scripts/algorithms/stratstats.dml
@@ -1,396 +1,396 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# STRATIFIED BIVARIATE STATISTICS, VERSION 4
-# 
-# INPUT PARAMETERS:
-# -----------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# -----------------------------------------------------------------------------
-# X     String  ---     Location to read matrix X that has all 1-st covariates
-# Y     String  " "     Location to read matrix Y that has all 2-nd covariates
-#                       the default value " " means "use X in place of Y"
-# S     String  " "     Location to read matrix S that has the stratum column
-#                       the default value " " means "use X in place of S"
-# Xcid  String  " "     Location to read the 1-st covariate X-column indices
-#                       the default value " " means "use columns 1 : ncol(X)"
-# Ycid  String  " "     Location to read the 2-nd covariate Y-column indices
-#                       the default value " " means "use columns 1 : ncol(Y)"
-# Scid  Int      1      Column index of the stratum column in S
-# O     String  ---     Location to store the output matrix (see below)
-# fmt   String "text"   Matrix output format, usually "text" or "csv"
-# -----------------------------------------------------------------------------
-# Note: the stratum column must contain small positive integers; all fractional
-# values are rounded; strata with ID <= 0 or NaN are treated as missing.
-#
-# OUTPUT MATRIX:
-# One row per each distinct pair (1st covariate, 2nd covariate)
-# 40 columns containing the following information:
-#     Col 01: 1st covariate X-column number
-#     Col 02: 1st covariate global presence count
-#     Col 03: 1st covariate global mean
-#     Col 04: 1st covariate global standard deviation
-#     Col 05: 1st covariate stratified standard deviation
-#     Col 06: R-squared, 1st covariate vs. strata
-#     Col 07: adjusted R-squared, 1st covariate vs. strata
-#     Col 08: P-value, 1st covariate vs. strata
-#     Col 09-10: Reserved
-#     Col 11: 2nd covariate Y-column number
-#     Col 12: 2nd covariate global presence count
-#     Col 13: 2nd covariate global mean
-#     Col 14: 2nd covariate global standard deviation
-#     Col 15: 2nd covariate stratified standard deviation
-#     Col 16: R-squared, 2nd covariate vs. strata
-#     Col 17: adjusted R-squared, 2nd covariate vs. strata
-#     Col 18: P-value, 2nd covariate vs. strata
-#     Col 19-20: Reserved
-#     Col 21: Global 1st & 2nd covariate presence count
-#     Col 22: Global regression slope (2nd vs. 1st covariate)
-#     Col 23: Global regression slope standard deviation
-#     Col 24: Global correlation = +/- sqrt(R-squared)
-#     Col 25: Global residual standard deviation
-#     Col 26: Global R-squared
-#     Col 27: Global adjusted R-squared
-#     Col 28: Global P-value for hypothesis "slope = 0"
-#     Col 29-30: Reserved
-#     Col 31: Stratified 1st & 2nd covariate presence count
-#     Col 32: Stratified regression slope (2nd vs. 1st covariate)
-#     Col 33: Stratified regression slope standard deviation
-#     Col 34: Stratified correlation = +/- sqrt(R-squared)
-#     Col 35: Stratified residual standard deviation
-#     Col 36: Stratified R-squared
-#     Col 37: Stratified adjusted R-squared
-#     Col 38: Stratified P-value for hypothesis "slope = 0"
-#     Col 39: Number of strata with at least two counted points
-#     Col 40: Reserved
-#
-# EXAMPLES:
-#
-# hadoop jar SystemML.jar -f stratstats.dml -nvargs X=INPUT_DIR/X.mtx Xcid=INPUT_DIR/Xcid.mtx
-#     Y=INPUT_DIR/Y.mtx Ycid=INPUT_DIR/Ycid.mtx S=INPUT_DIR/S.mtx Scid=1 O=OUTPUT_DIR/Out.mtx fmt=csv
-#
-# hadoop jar SystemML.jar -f stratstats.dml -nvargs X=INPUT_DIR/Data.mtx Xcid=INPUT_DIR/Xcid.mtx
-#     Ycid=INPUT_DIR/Ycid.mtx Scid=1 O=OUTPUT_DIR/Out.mtx
-
-fileX = $X;
-fileY = ifdef ($Y, " ");
-fileS = ifdef ($S, " ");
-fileO = $O;
-fmtO  = ifdef ($fmt, "text");
-
-fileXcid = ifdef ($Xcid, " ");
-fileYcid = ifdef ($Ycid, " ");
-stratum_column_id = ifdef ($Scid, 1);
-
-print ("BEGIN STRATIFIED STATISTICS SCRIPT");
-
-print ("Reading the input matrices...");
-
-XwithNaNs = read (fileX);
-if (fileY != " ") {
-    YwithNaNs = read (fileY);
-} else {
-    YwithNaNs = XwithNaNs;
-}
-if (fileS != " ") {
-    SwithNaNsFull = read (fileS);
-    SwithNaNs = SwithNaNsFull [, stratum_column_id];
-} else {
-    SwithNaNs = XwithNaNs [, stratum_column_id];
-}
-if (fileXcid != " ") {
-    Xcols = read (fileXcid);
-} else {
-    Xcols = t(seq (1, ncol (XwithNaNs), 1));
-}
-if (fileYcid != " ") {
-    Ycols = read (fileYcid);
-} else {
-    Ycols = t(seq (1, ncol (YwithNaNs), 1));
-}
-tXcols = t(Xcols);
-tYcols = t(Ycols);
-
-num_records  = nrow (XwithNaNs);
-num_attrs    = ncol (XwithNaNs);
-num_attrs_X  = ncol (Xcols);
-num_attrs_Y  = ncol (Ycols);
-num_attrs_XY = num_attrs_X * num_attrs_Y;
-
-print ("Preparing the covariates...");
-
-XnoNaNs = replace (target = XwithNaNs, pattern = 0.0/0.0, replacement = 0);
-YnoNaNs = replace (target = YwithNaNs, pattern = 0.0/0.0, replacement = 0);
-XNaNmask = ppred (XwithNaNs, XwithNaNs, "==");
-YNaNmask = ppred (YwithNaNs, YwithNaNs, "==");
-one_to_num_attrs_X = seq (1, num_attrs_X, 1);
-one_to_num_attrs_Y = seq (1, num_attrs_Y, 1);
-ProjX = matrix (0, rows = num_attrs, cols = num_attrs_X);
-ProjY = matrix (0, rows = num_attrs, cols = num_attrs_Y);
-
-ProjX_ctable = table (tXcols, one_to_num_attrs_X);
-ProjX [1 : nrow (ProjX_ctable), ] = ProjX_ctable;
-
-ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
-ProjY [1 : nrow (ProjY_ctable), ] = ProjY_ctable;
-
-X = XnoNaNs %*% ProjX;
-Y = YnoNaNs %*% ProjY;
-X_mask = XNaNmask %*% ProjX;
-Y_mask = YNaNmask %*% ProjY;
-
-print ("Preparing the strata...");
-
-SnoNaNs = replace (target = SwithNaNs, pattern = 0.0/0.0, replacement = 0);
-S = round (SnoNaNs) * ppred (SnoNaNs, 0.0, ">");
-Proj_good_stratumID = diag (ppred (S, 0.0, ">"));
-Proj_good_stratumID = removeEmpty (target = Proj_good_stratumID, margin = "rows");
-vector_of_good_stratumIDs = Proj_good_stratumID %*% S;
-vector_of_good_stratumIDs = vector_of_good_stratumIDs + (1 - min (vector_of_good_stratumIDs));
-num_records_with_good_stratumID = nrow (Proj_good_stratumID);
-one_to_num_records_with_good_stratumID = seq (1, num_records_with_good_stratumID, 1);
-
-# Create a group-by summation matrix for records over stratum IDs
-# "with_empty" means with stratum IDs that never occur in records
-
-num_strata_with_empty = max (vector_of_good_stratumIDs);
-StrataSummator_with_empty = table (vector_of_good_stratumIDs, one_to_num_records_with_good_stratumID);
-StrataSummator = removeEmpty (target = StrataSummator_with_empty, margin = "rows");
-StrataSummator = StrataSummator %*% Proj_good_stratumID;
-num_strata = nrow (StrataSummator);
-num_empty_strata = num_strata_with_empty - num_strata;
-print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but positive-ID strata.");
-
-print ("Computing the global single-variate statistics...");
-
-cnt_X_global = colSums (X_mask);
-cnt_Y_global = colSums (Y_mask);
-avg_X_global = colSums (X) / cnt_X_global;
-avg_Y_global = colSums (Y) / cnt_Y_global;
-var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global);
-var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global);
-                 sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
-stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
-                 sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1);
-stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
-
-print ("Computing the stratified single-variate statistics...");
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (due to NaNs in X or Y) strata
-
-Cnt_X_per_stratum = StrataSummator %*% X_mask;
-Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
-Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
-Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
-One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum);
-One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum);
-num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
-num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
-
-Sum_X_per_stratum  = StrataSummator %*% X;
-Sum_Y_per_stratum  = StrataSummator %*% Y;
-
-# Recompute some global statistics to exclude bad stratum-ID records
-
-cnt_X_with_good_stratumID = colSums (Cnt_X_per_stratum);
-cnt_Y_with_good_stratumID = colSums (Cnt_Y_per_stratum);
-sum_X_with_good_stratumID = colSums (Sum_X_per_stratum);
-sum_Y_with_good_stratumID = colSums (Sum_Y_per_stratum);
-var_sumX_with_good_stratumID = colSums (StrataSummator %*% (X * X)) - (sum_X_with_good_stratumID * sum_X_with_good_stratumID) / cnt_X_with_good_stratumID;
-var_sumY_with_good_stratumID = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_with_good_stratumID * sum_Y_with_good_stratumID) / cnt_Y_with_good_stratumID;
-
-# Compute the stratified statistics
-
-var_sumX_stratified   = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
-var_sumY_stratified   = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
-                        sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata);
-stdev_X_stratified    = sqrt_failsafe (sqrt_failsafe_input_3);
-                        sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata);
-stdev_Y_stratified    = sqrt_failsafe (sqrt_failsafe_input_4);
-r_sqr_X_vs_strata     = 1 - var_sumX_stratified / var_sumX_with_good_stratumID;
-r_sqr_Y_vs_strata     = 1 - var_sumY_stratified / var_sumY_with_good_stratumID;
-adj_r_sqr_X_vs_strata = 1 - (var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata)) / (var_sumX_with_good_stratumID / (cnt_X_with_good_stratumID - 1));
-adj_r_sqr_Y_vs_strata = 1 - (var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata)) / (var_sumY_with_good_stratumID / (cnt_Y_with_good_stratumID - 1));
-fStat_X_vs_strata     = ((var_sumX_with_good_stratumID - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata));
-fStat_Y_vs_strata     = ((var_sumY_with_good_stratumID - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata));
-p_val_X_vs_strata     = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_with_good_stratumID - num_X_nonempty_strata);
-p_val_Y_vs_strata     = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_with_good_stratumID - num_Y_nonempty_strata);
-
-print ("Computing the global bivariate statistics...");
-
-# Compute the aggregate X vs. Y statistics and map them into proper positions
-
-cnt_XY_rectangle       = t(X_mask) %*% Y_mask;
-sum_X_forXY_rectangle  = t(X)      %*% Y_mask;
-sum_XX_forXY_rectangle = t(X * X)  %*% Y_mask;
-sum_Y_forXY_rectangle  = t(X_mask) %*% Y;
-sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
-sum_XY_rectangle       = t(X)      %*% Y;
-cnt_XY_global       = matrix (cnt_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_X_forXY_global  = matrix (sum_X_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_Y_forXY_global  = matrix (sum_Y_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XY_global       = matrix (sum_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
-ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
-
-# Compute the global bivariate statistics for output
-
-cov_sumX_sumY_global    = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-var_sumX_forXY_global   = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global;
-var_sumY_forXY_global   = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-slope_XY_global         = cov_sumX_sumY_global / var_sumX_forXY_global;
-                                                 sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global;
-                                                 sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5);
-corr_XY_global          = cov_sumX_sumY_global / sqrt_failsafe_output_5;
-r_sqr_X_vs_Y_global     = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global);
-adj_r_sqr_X_vs_Y_global = 1 - (1 - r_sqr_X_vs_Y_global) * (cnt_XY_global - 1) / (cnt_XY_global - 2);
-                          sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
-stdev_slope_XY_global   = sqrt_failsafe (sqrt_failsafe_input_6);
-                          sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2)
-stdev_errY_vs_X_global  = sqrt_failsafe (sqrt_failsafe_input_7);
-fStat_Y_vs_X_global     = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global);
-p_val_Y_vs_X_global     = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2);
-
-print ("Computing the stratified bivariate statistics...");
-
-# Create projections to "intermingle" X and Y into attribute pairs
-
-Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
-Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
-ones_Y_col   = matrix (1.0, rows = num_attrs_Y, cols = 1);
-for (i in 1:num_attrs_X) {
-    start_cid = (i - 1) * num_attrs_Y + 1;
-    end_cid = i * num_attrs_Y;
-    Proj_X_to_XY [i, start_cid:end_cid] = t(ones_Y_col);
-    Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_col);
-}
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (due to NaNs in X or Y) strata
-
-Cnt_XY_per_stratum       = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_X_forXY_per_stratum  = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_Y_forXY_per_stratum  = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
-Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY));
-Sum_XY_per_stratum       = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
-
-Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
-One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum);
-num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
-
-# Recompute some global aggregate X vs. Y statistics to exclude bad stratum-ID records
-
-cnt_XY_with_good_stratumID = colSums (Cnt_XY_per_stratum);
-sum_XX_forXY_with_good_stratumID = colSums (Sum_XX_forXY_per_stratum);
-sum_YY_forXY_with_good_stratumID = colSums (Sum_YY_forXY_per_stratum);
-sum_XY_with_good_stratumID = colSums (Sum_XY_per_stratum);
-
-# Compute the stratified bivariate statistics
-
-var_sumX_forXY_stratified = sum_XX_forXY_with_good_stratumID - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-var_sumY_forXY_stratified = sum_YY_forXY_with_good_stratumID - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-cov_sumX_sumY_stratified  = sum_XY_with_good_stratumID       - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-
-slope_XY_stratified     = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
-                                                     sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified;
-                                                     sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8);
-corr_XY_stratified      = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
-r_sqr_X_vs_Y_stratified = (cov_sumX_sumY_stratified ^ 2) / (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
-temp_X_vs_Y_stratified  = (1 - r_sqr_X_vs_Y_stratified) / (cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1)
-adj_r_sqr_X_vs_Y_stratified = 1 - temp_X_vs_Y_stratified * (cnt_XY_with_good_stratumID - num_XY_nonempty_strata);
-                              sqrt_failsafe_input_9  = temp_X_vs_Y_stratified * var_sumY_forXY_stratified;
-stdev_errY_vs_X_stratified  = sqrt_failsafe (sqrt_failsafe_input_9);
-                              sqrt_failsafe_input_10 = sqrt_failsafe_input_9  / var_sumX_forXY_stratified;
-stdev_slope_XY_stratified   = sqrt_failsafe (sqrt_failsafe_input_10);
-fStat_Y_vs_X_stratified = (cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
-p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1);
-
-print ("Preparing the output matrix...");
-OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
-
-OutMtx [ 1, ] = Xcols                 %*% Proj_X_to_XY;  # 1st covariate column number
-OutMtx [ 2, ] = cnt_X_global          %*% Proj_X_to_XY;  # 1st covariate global presence count
-OutMtx [ 3, ] = avg_X_global          %*% Proj_X_to_XY;  # 1st covariate global mean
-OutMtx [ 4, ] = stdev_X_global        %*% Proj_X_to_XY;  # 1st covariate global standard deviation
-OutMtx [ 5, ] = stdev_X_stratified    %*% Proj_X_to_XY;  # 1st covariate stratified standard deviation
-OutMtx [ 6, ] = r_sqr_X_vs_strata     %*% Proj_X_to_XY;  # R-squared, 1st covariate vs. strata
-OutMtx [ 7, ] = adj_r_sqr_X_vs_strata %*% Proj_X_to_XY;  # adjusted R-squared, 1st covariate vs. strata
-OutMtx [ 8, ] = p_val_X_vs_strata     %*% Proj_X_to_XY;  # P-value, 1st covariate vs. strata
-OutMtx [11, ] = Ycols                 %*% Proj_Y_to_XY;  # 2nd covariate column number
-OutMtx [12, ] = cnt_Y_global          %*% Proj_Y_to_XY;  # 2nd covariate global presence count
-OutMtx [13, ] = avg_Y_global          %*% Proj_Y_to_XY;  # 2nd covariate global mean
-OutMtx [14, ] = stdev_Y_global        %*% Proj_Y_to_XY;  # 2nd covariate global standard deviation
-OutMtx [15, ] = stdev_Y_stratified    %*% Proj_Y_to_XY;  # 2nd covariate stratified standard deviation
-OutMtx [16, ] = r_sqr_Y_vs_strata     %*% Proj_Y_to_XY;  # R-squared, 2nd covariate vs. strata
-OutMtx [17, ] = adj_r_sqr_Y_vs_strata %*% Proj_Y_to_XY;  # adjusted R-squared, 2nd covariate vs. strata
-OutMtx [18, ] = p_val_Y_vs_strata     %*% Proj_Y_to_XY;  # P-value, 2nd covariate vs. strata
-
-OutMtx [21, ] = cnt_XY_global;               # Global 1st & 2nd covariate presence count
-OutMtx [22, ] = slope_XY_global;             # Global regression slope (2nd vs. 1st covariate)
-OutMtx [23, ] = stdev_slope_XY_global;       # Global regression slope standard deviation
-OutMtx [24, ] = corr_XY_global;              # Global correlation = +/- sqrt(R-squared)
-OutMtx [25, ] = stdev_errY_vs_X_global;      # Global residual standard deviation
-OutMtx [26, ] = r_sqr_X_vs_Y_global;         # Global R-squared
-OutMtx [27, ] = adj_r_sqr_X_vs_Y_global;     # Global adjusted R-squared
-OutMtx [28, ] = p_val_Y_vs_X_global;         # Global P-value for hypothesis "slope = 0"
-OutMtx [31, ] = cnt_XY_with_good_stratumID;  # Stratified 1st & 2nd covariate presence count
-OutMtx [32, ] = slope_XY_stratified;         # Stratified regression slope (2nd vs. 1st covariate)
-OutMtx [33, ] = stdev_slope_XY_stratified;   # Stratified regression slope standard deviation
-OutMtx [34, ] = corr_XY_stratified;          # Stratified correlation = +/- sqrt(R-squared)
-OutMtx [35, ] = stdev_errY_vs_X_stratified;  # Stratified residual standard deviation
-OutMtx [36, ] = r_sqr_X_vs_Y_stratified;     # Stratified R-squared
-OutMtx [37, ] = adj_r_sqr_X_vs_Y_stratified; # Stratified adjusted R-squared
-OutMtx [38, ] = p_val_Y_vs_X_stratified;     # Stratified P-value for hypothesis "slope = 0"
-OutMtx [39, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">="));  # Number of strata with at least two counted points
-
-OutMtx = t(OutMtx);
-
-print ("Writing the output matrix...");
-write (OutMtx, fileO, format=fmtO);
-print ("END STRATIFIED STATISTICS SCRIPT");
-
-
-fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob)
-{ # TEMPORARY IMPLEMENTATION
-    tailprob = fStat;
-    for (i in 1:nrow(fStat)) {
-      for (j in 1:ncol(fStat)) {
-        q = castAsScalar (fStat [i, j]);
-        d1 = castAsScalar (df_1 [i, j]);
-        d2 = castAsScalar (df_2 [i, j]);
-        if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
-            tailprob  [i, j] = pf(target = q, df1 = d1, df2 = d2, lower.tail=FALSE);
-        } else {
-            tailprob  [i, j] = 0/0;
-        }
-    } }
-}
-
-sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A)
-{
-    mask_A = ppred (input_A, 0.0, ">=");
-    prep_A = input_A * mask_A;
-    mask_A = mask_A * ppred (prep_A, prep_A, "==");
-    prep_A = replace (target = prep_A, pattern = 0.0/0.0, replacement = 0);
-    output_A = sqrt (prep_A) / mask_A;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# STRATIFIED BIVARIATE STATISTICS, VERSION 4
+# 
+# INPUT PARAMETERS:
+# -----------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# -----------------------------------------------------------------------------
+# X     String  ---     Location to read matrix X that has all 1-st covariates
+# Y     String  " "     Location to read matrix Y that has all 2-nd covariates
+#                       the default value " " means "use X in place of Y"
+# S     String  " "     Location to read matrix S that has the stratum column
+#                       the default value " " means "use X in place of S"
+# Xcid  String  " "     Location to read the 1-st covariate X-column indices
+#                       the default value " " means "use columns 1 : ncol(X)"
+# Ycid  String  " "     Location to read the 2-nd covariate Y-column indices
+#                       the default value " " means "use columns 1 : ncol(Y)"
+# Scid  Int      1      Column index of the stratum column in S
+# O     String  ---     Location to store the output matrix (see below)
+# fmt   String "text"   Matrix output format, usually "text" or "csv"
+# -----------------------------------------------------------------------------
+# Note: the stratum column must contain small positive integers; all fractional
+# values are rounded; strata with ID <= 0 or NaN are treated as missing.
+#
+# OUTPUT MATRIX:
+# One row per each distinct pair (1st covariate, 2nd covariate)
+# 40 columns containing the following information:
+#     Col 01: 1st covariate X-column number
+#     Col 02: 1st covariate global presence count
+#     Col 03: 1st covariate global mean
+#     Col 04: 1st covariate global standard deviation
+#     Col 05: 1st covariate stratified standard deviation
+#     Col 06: R-squared, 1st covariate vs. strata
+#     Col 07: adjusted R-squared, 1st covariate vs. strata
+#     Col 08: P-value, 1st covariate vs. strata
+#     Col 09-10: Reserved
+#     Col 11: 2nd covariate Y-column number
+#     Col 12: 2nd covariate global presence count
+#     Col 13: 2nd covariate global mean
+#     Col 14: 2nd covariate global standard deviation
+#     Col 15: 2nd covariate stratified standard deviation
+#     Col 16: R-squared, 2nd covariate vs. strata
+#     Col 17: adjusted R-squared, 2nd covariate vs. strata
+#     Col 18: P-value, 2nd covariate vs. strata
+#     Col 19-20: Reserved
+#     Col 21: Global 1st & 2nd covariate presence count
+#     Col 22: Global regression slope (2nd vs. 1st covariate)
+#     Col 23: Global regression slope standard deviation
+#     Col 24: Global correlation = +/- sqrt(R-squared)
+#     Col 25: Global residual standard deviation
+#     Col 26: Global R-squared
+#     Col 27: Global adjusted R-squared
+#     Col 28: Global P-value for hypothesis "slope = 0"
+#     Col 29-30: Reserved
+#     Col 31: Stratified 1st & 2nd covariate presence count
+#     Col 32: Stratified regression slope (2nd vs. 1st covariate)
+#     Col 33: Stratified regression slope standard deviation
+#     Col 34: Stratified correlation = +/- sqrt(R-squared)
+#     Col 35: Stratified residual standard deviation
+#     Col 36: Stratified R-squared
+#     Col 37: Stratified adjusted R-squared
+#     Col 38: Stratified P-value for hypothesis "slope = 0"
+#     Col 39: Number of strata with at least two counted points
+#     Col 40: Reserved
+#
+# EXAMPLES:
+#
+# hadoop jar SystemML.jar -f stratstats.dml -nvargs X=INPUT_DIR/X.mtx Xcid=INPUT_DIR/Xcid.mtx
+#     Y=INPUT_DIR/Y.mtx Ycid=INPUT_DIR/Ycid.mtx S=INPUT_DIR/S.mtx Scid=1 O=OUTPUT_DIR/Out.mtx fmt=csv
+#
+# hadoop jar SystemML.jar -f stratstats.dml -nvargs X=INPUT_DIR/Data.mtx Xcid=INPUT_DIR/Xcid.mtx
+#     Ycid=INPUT_DIR/Ycid.mtx Scid=1 O=OUTPUT_DIR/Out.mtx
+
+fileX = $X;
+fileY = ifdef ($Y, " ");
+fileS = ifdef ($S, " ");
+fileO = $O;
+fmtO  = ifdef ($fmt, "text");
+
+fileXcid = ifdef ($Xcid, " ");
+fileYcid = ifdef ($Ycid, " ");
+stratum_column_id = ifdef ($Scid, 1);
+
+print ("BEGIN STRATIFIED STATISTICS SCRIPT");
+
+print ("Reading the input matrices...");
+
+XwithNaNs = read (fileX);
+if (fileY != " ") {
+    YwithNaNs = read (fileY);
+} else {
+    YwithNaNs = XwithNaNs;
+}
+if (fileS != " ") {
+    SwithNaNsFull = read (fileS);
+    SwithNaNs = SwithNaNsFull [, stratum_column_id];
+} else {
+    SwithNaNs = XwithNaNs [, stratum_column_id];
+}
+if (fileXcid != " ") {
+    Xcols = read (fileXcid);
+} else {
+    Xcols = t(seq (1, ncol (XwithNaNs), 1));
+}
+if (fileYcid != " ") {
+    Ycols = read (fileYcid);
+} else {
+    Ycols = t(seq (1, ncol (YwithNaNs), 1));
+}
+tXcols = t(Xcols);
+tYcols = t(Ycols);
+
+num_records  = nrow (XwithNaNs);
+num_attrs    = ncol (XwithNaNs);
+num_attrs_X  = ncol (Xcols);
+num_attrs_Y  = ncol (Ycols);
+num_attrs_XY = num_attrs_X * num_attrs_Y;
+
+print ("Preparing the covariates...");
+
+XnoNaNs = replace (target = XwithNaNs, pattern = 0.0/0.0, replacement = 0);
+YnoNaNs = replace (target = YwithNaNs, pattern = 0.0/0.0, replacement = 0);
+XNaNmask = ppred (XwithNaNs, XwithNaNs, "==");
+YNaNmask = ppred (YwithNaNs, YwithNaNs, "==");
+one_to_num_attrs_X = seq (1, num_attrs_X, 1);
+one_to_num_attrs_Y = seq (1, num_attrs_Y, 1);
+ProjX = matrix (0, rows = num_attrs, cols = num_attrs_X);
+ProjY = matrix (0, rows = num_attrs, cols = num_attrs_Y);
+
+ProjX_ctable = table (tXcols, one_to_num_attrs_X);
+ProjX [1 : nrow (ProjX_ctable), ] = ProjX_ctable;
+
+ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
+ProjY [1 : nrow (ProjY_ctable), ] = ProjY_ctable;
+
+X = XnoNaNs %*% ProjX;
+Y = YnoNaNs %*% ProjY;
+X_mask = XNaNmask %*% ProjX;
+Y_mask = YNaNmask %*% ProjY;
+
+print ("Preparing the strata...");
+
+SnoNaNs = replace (target = SwithNaNs, pattern = 0.0/0.0, replacement = 0);
+S = round (SnoNaNs) * ppred (SnoNaNs, 0.0, ">");
+Proj_good_stratumID = diag (ppred (S, 0.0, ">"));
+Proj_good_stratumID = removeEmpty (target = Proj_good_stratumID, margin = "rows");
+vector_of_good_stratumIDs = Proj_good_stratumID %*% S;
+vector_of_good_stratumIDs = vector_of_good_stratumIDs + (1 - min (vector_of_good_stratumIDs));
+num_records_with_good_stratumID = nrow (Proj_good_stratumID);
+one_to_num_records_with_good_stratumID = seq (1, num_records_with_good_stratumID, 1);
+
+# Create a group-by summation matrix for records over stratum IDs
+# "with_empty" means with stratum IDs that never occur in records
+
+num_strata_with_empty = max (vector_of_good_stratumIDs);
+StrataSummator_with_empty = table (vector_of_good_stratumIDs, one_to_num_records_with_good_stratumID);
+StrataSummator = removeEmpty (target = StrataSummator_with_empty, margin = "rows");
+StrataSummator = StrataSummator %*% Proj_good_stratumID;
+num_strata = nrow (StrataSummator);
+num_empty_strata = num_strata_with_empty - num_strata;
+print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but positive-ID strata.");
+
+print ("Computing the global single-variate statistics...");
+
+cnt_X_global = colSums (X_mask);
+cnt_Y_global = colSums (Y_mask);
+avg_X_global = colSums (X) / cnt_X_global;
+avg_Y_global = colSums (Y) / cnt_Y_global;
+var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global);
+var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global);
+                 sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
+stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
+                 sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1);
+stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
+
+print ("Computing the stratified single-variate statistics...");
+
+# Compute per-stratum statistics, prevent div-0 for locally empty (due to NaNs in X or Y) strata
+
+Cnt_X_per_stratum = StrataSummator %*% X_mask;
+Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
+Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
+Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
+One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum);
+One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum);
+num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
+num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
+
+Sum_X_per_stratum  = StrataSummator %*% X;
+Sum_Y_per_stratum  = StrataSummator %*% Y;
+
+# Recompute some global statistics to exclude bad stratum-ID records
+
+cnt_X_with_good_stratumID = colSums (Cnt_X_per_stratum);
+cnt_Y_with_good_stratumID = colSums (Cnt_Y_per_stratum);
+sum_X_with_good_stratumID = colSums (Sum_X_per_stratum);
+sum_Y_with_good_stratumID = colSums (Sum_Y_per_stratum);
+var_sumX_with_good_stratumID = colSums (StrataSummator %*% (X * X)) - (sum_X_with_good_stratumID * sum_X_with_good_stratumID) / cnt_X_with_good_stratumID;
+var_sumY_with_good_stratumID = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_with_good_stratumID * sum_Y_with_good_stratumID) / cnt_Y_with_good_stratumID;
+
+# Compute the stratified statistics
+
+var_sumX_stratified   = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
+var_sumY_stratified   = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
+                        sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata);
+stdev_X_stratified    = sqrt_failsafe (sqrt_failsafe_input_3);
+                        sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata);
+stdev_Y_stratified    = sqrt_failsafe (sqrt_failsafe_input_4);
+r_sqr_X_vs_strata     = 1 - var_sumX_stratified / var_sumX_with_good_stratumID;
+r_sqr_Y_vs_strata     = 1 - var_sumY_stratified / var_sumY_with_good_stratumID;
+adj_r_sqr_X_vs_strata = 1 - (var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata)) / (var_sumX_with_good_stratumID / (cnt_X_with_good_stratumID - 1));
+adj_r_sqr_Y_vs_strata = 1 - (var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata)) / (var_sumY_with_good_stratumID / (cnt_Y_with_good_stratumID - 1));
+fStat_X_vs_strata     = ((var_sumX_with_good_stratumID - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_with_good_stratumID - num_X_nonempty_strata));
+fStat_Y_vs_strata     = ((var_sumY_with_good_stratumID - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_with_good_stratumID - num_Y_nonempty_strata));
+p_val_X_vs_strata     = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_with_good_stratumID - num_X_nonempty_strata);
+p_val_Y_vs_strata     = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_with_good_stratumID - num_Y_nonempty_strata);
+
+print ("Computing the global bivariate statistics...");
+
+# Compute the aggregate X vs. Y statistics and map them into proper positions
+
+cnt_XY_rectangle       = t(X_mask) %*% Y_mask;
+sum_X_forXY_rectangle  = t(X)      %*% Y_mask;
+sum_XX_forXY_rectangle = t(X * X)  %*% Y_mask;
+sum_Y_forXY_rectangle  = t(X_mask) %*% Y;
+sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
+sum_XY_rectangle       = t(X)      %*% Y;
+cnt_XY_global       = matrix (cnt_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_X_forXY_global  = matrix (sum_X_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_Y_forXY_global  = matrix (sum_Y_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_XY_global       = matrix (sum_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
+ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
+
+# Compute the global bivariate statistics for output
+
+cov_sumX_sumY_global    = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global;
+var_sumX_forXY_global   = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global;
+var_sumY_forXY_global   = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global;
+slope_XY_global         = cov_sumX_sumY_global / var_sumX_forXY_global;
+                                                 sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global;
+                                                 sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5);
+corr_XY_global          = cov_sumX_sumY_global / sqrt_failsafe_output_5;
+r_sqr_X_vs_Y_global     = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global);
+adj_r_sqr_X_vs_Y_global = 1 - (1 - r_sqr_X_vs_Y_global) * (cnt_XY_global - 1) / (cnt_XY_global - 2);
+                          sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
+stdev_slope_XY_global   = sqrt_failsafe (sqrt_failsafe_input_6);
+                          sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2)
+stdev_errY_vs_X_global  = sqrt_failsafe (sqrt_failsafe_input_7);
+fStat_Y_vs_X_global     = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global);
+p_val_Y_vs_X_global     = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2);
+
+print ("Computing the stratified bivariate statistics...");
+
+# Create projections to "intermingle" X and Y into attribute pairs
+
+Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
+Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
+ones_Y_col   = matrix (1.0, rows = num_attrs_Y, cols = 1);
+for (i in 1:num_attrs_X) {
+    start_cid = (i - 1) * num_attrs_Y + 1;
+    end_cid = i * num_attrs_Y;
+    Proj_X_to_XY [i, start_cid:end_cid] = t(ones_Y_col);
+    Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_col);
+}
+
+# Compute per-stratum statistics, prevent div-0 for locally empty (due to NaNs in X or Y) strata
+
+Cnt_XY_per_stratum       = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_X_forXY_per_stratum  = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_Y_forXY_per_stratum  = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
+Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY));
+Sum_XY_per_stratum       = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
+
+Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
+One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum);
+num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
+
+# Recompute some global aggregate X vs. Y statistics to exclude bad stratum-ID records
+
+cnt_XY_with_good_stratumID = colSums (Cnt_XY_per_stratum);
+sum_XX_forXY_with_good_stratumID = colSums (Sum_XX_forXY_per_stratum);
+sum_YY_forXY_with_good_stratumID = colSums (Sum_YY_forXY_per_stratum);
+sum_XY_with_good_stratumID = colSums (Sum_XY_per_stratum);
+
+# Compute the stratified bivariate statistics
+
+var_sumX_forXY_stratified = sum_XX_forXY_with_good_stratumID - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+var_sumY_forXY_stratified = sum_YY_forXY_with_good_stratumID - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+cov_sumX_sumY_stratified  = sum_XY_with_good_stratumID       - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+
+slope_XY_stratified     = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
+                                                     sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified;
+                                                     sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8);
+corr_XY_stratified      = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
+r_sqr_X_vs_Y_stratified = (cov_sumX_sumY_stratified ^ 2) / (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
+temp_X_vs_Y_stratified  = (1 - r_sqr_X_vs_Y_stratified) / (cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1)
+adj_r_sqr_X_vs_Y_stratified = 1 - temp_X_vs_Y_stratified * (cnt_XY_with_good_stratumID - num_XY_nonempty_strata);
+                              sqrt_failsafe_input_9  = temp_X_vs_Y_stratified * var_sumY_forXY_stratified;
+stdev_errY_vs_X_stratified  = sqrt_failsafe (sqrt_failsafe_input_9);
+                              sqrt_failsafe_input_10 = sqrt_failsafe_input_9  / var_sumX_forXY_stratified;
+stdev_slope_XY_stratified   = sqrt_failsafe (sqrt_failsafe_input_10);
+fStat_Y_vs_X_stratified = (cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
+p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_with_good_stratumID - num_XY_nonempty_strata - 1);
+
+print ("Preparing the output matrix...");
+OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
+
+OutMtx [ 1, ] = Xcols                 %*% Proj_X_to_XY;  # 1st covariate column number
+OutMtx [ 2, ] = cnt_X_global          %*% Proj_X_to_XY;  # 1st covariate global presence count
+OutMtx [ 3, ] = avg_X_global          %*% Proj_X_to_XY;  # 1st covariate global mean
+OutMtx [ 4, ] = stdev_X_global        %*% Proj_X_to_XY;  # 1st covariate global standard deviation
+OutMtx [ 5, ] = stdev_X_stratified    %*% Proj_X_to_XY;  # 1st covariate stratified standard deviation
+OutMtx [ 6, ] = r_sqr_X_vs_strata     %*% Proj_X_to_XY;  # R-squared, 1st covariate vs. strata
+OutMtx [ 7, ] = adj_r_sqr_X_vs_strata %*% Proj_X_to_XY;  # adjusted R-squared, 1st covariate vs. strata
+OutMtx [ 8, ] = p_val_X_vs_strata     %*% Proj_X_to_XY;  # P-value, 1st covariate vs. strata
+OutMtx [11, ] = Ycols                 %*% Proj_Y_to_XY;  # 2nd covariate column number
+OutMtx [12, ] = cnt_Y_global          %*% Proj_Y_to_XY;  # 2nd covariate global presence count
+OutMtx [13, ] = avg_Y_global          %*% Proj_Y_to_XY;  # 2nd covariate global mean
+OutMtx [14, ] = stdev_Y_global        %*% Proj_Y_to_XY;  # 2nd covariate global standard deviation
+OutMtx [15, ] = stdev_Y_stratified    %*% Proj_Y_to_XY;  # 2nd covariate stratified standard deviation
+OutMtx [16, ] = r_sqr_Y_vs_strata     %*% Proj_Y_to_XY;  # R-squared, 2nd covariate vs. strata
+OutMtx [17, ] = adj_r_sqr_Y_vs_strata %*% Proj_Y_to_XY;  # adjusted R-squared, 2nd covariate vs. strata
+OutMtx [18, ] = p_val_Y_vs_strata     %*% Proj_Y_to_XY;  # P-value, 2nd covariate vs. strata
+
+OutMtx [21, ] = cnt_XY_global;               # Global 1st & 2nd covariate presence count
+OutMtx [22, ] = slope_XY_global;             # Global regression slope (2nd vs. 1st covariate)
+OutMtx [23, ] = stdev_slope_XY_global;       # Global regression slope standard deviation
+OutMtx [24, ] = corr_XY_global;              # Global correlation = +/- sqrt(R-squared)
+OutMtx [25, ] = stdev_errY_vs_X_global;      # Global residual standard deviation
+OutMtx [26, ] = r_sqr_X_vs_Y_global;         # Global R-squared
+OutMtx [27, ] = adj_r_sqr_X_vs_Y_global;     # Global adjusted R-squared
+OutMtx [28, ] = p_val_Y_vs_X_global;         # Global P-value for hypothesis "slope = 0"
+OutMtx [31, ] = cnt_XY_with_good_stratumID;  # Stratified 1st & 2nd covariate presence count
+OutMtx [32, ] = slope_XY_stratified;         # Stratified regression slope (2nd vs. 1st covariate)
+OutMtx [33, ] = stdev_slope_XY_stratified;   # Stratified regression slope standard deviation
+OutMtx [34, ] = corr_XY_stratified;          # Stratified correlation = +/- sqrt(R-squared)
+OutMtx [35, ] = stdev_errY_vs_X_stratified;  # Stratified residual standard deviation
+OutMtx [36, ] = r_sqr_X_vs_Y_stratified;     # Stratified R-squared
+OutMtx [37, ] = adj_r_sqr_X_vs_Y_stratified; # Stratified adjusted R-squared
+OutMtx [38, ] = p_val_Y_vs_X_stratified;     # Stratified P-value for hypothesis "slope = 0"
+OutMtx [39, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">="));  # Number of strata with at least two counted points
+
+OutMtx = t(OutMtx);
+
+print ("Writing the output matrix...");
+write (OutMtx, fileO, format=fmtO);
+print ("END STRATIFIED STATISTICS SCRIPT");
+
+
+fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob)
+{ # TEMPORARY IMPLEMENTATION
+    tailprob = fStat;
+    for (i in 1:nrow(fStat)) {
+      for (j in 1:ncol(fStat)) {
+        q = castAsScalar (fStat [i, j]);
+        d1 = castAsScalar (df_1 [i, j]);
+        d2 = castAsScalar (df_2 [i, j]);
+        if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
+            tailprob  [i, j] = pf(target = q, df1 = d1, df2 = d2, lower.tail=FALSE);
+        } else {
+            tailprob  [i, j] = 0/0;
+        }
+    } }
+}
+
+sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A)
+{
+    mask_A = ppred (input_A, 0.0, ">=");
+    prep_A = input_A * mask_A;
+    mask_A = mask_A * ppred (prep_A, prep_A, "==");
+    prep_A = replace (target = prep_A, pattern = 0.0/0.0, replacement = 0);
+    output_A = sqrt (prep_A) / mask_A;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genCorrelatedData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genCorrelatedData.dml b/scripts/datagen/genCorrelatedData.dml
index e81583b..d3289ce 100644
--- a/scripts/datagen/genCorrelatedData.dml
+++ b/scripts/datagen/genCorrelatedData.dml
@@ -1,46 +1,46 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random correlated data
-# can generate any number of variables/columns
-# used to test univariate stats computation
-# by systemml
-
-# $1 is number of variables/columns
-# $2 is number of samples to create
-# $3 is the location to write out the covariance mat
-# $4 is the location to write out the generated data
-dims = $1
-numSamples = $2
-
-U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
-denoms = sqrt(colSums(U*U))
-parfor(i in 1:dims){
-	U[i,] = U[i,] / denoms
-}
-
-C = t(U)%*%U
-write(C, $3, format="binary")
-
-R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
-Rc = R%*%U
-write(Rc, $4, format="binary")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random correlated data
+# can generate any number of variables/columns
+# used to test univariate stats computation
+# by systemml
+
+# $1 is number of variables/columns
+# $2 is number of samples to create
+# $3 is the location to write out the covariance mat
+# $4 is the location to write out the generated data
+dims = $1
+numSamples = $2
+
+U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
+denoms = sqrt(colSums(U*U))
+parfor(i in 1:dims){
+	U[i,] = U[i,] / denoms
+}
+
+C = t(U)%*%U
+write(C, $3, format="binary")
+
+R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
+Rc = R%*%U
+write(Rc, $4, format="binary")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4ChisquaredTest.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4ChisquaredTest.dml b/scripts/datagen/genRandData4ChisquaredTest.dml
index 42db9dd..e25adf2 100644
--- a/scripts/datagen/genRandData4ChisquaredTest.dml
+++ b/scripts/datagen/genRandData4ChisquaredTest.dml
@@ -1,87 +1,87 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates a two column matrix of categorical
-# variables
-# used to test systemml's chi-squared bivariate stat
-# computation
-
-# $1 is number of samples to generate
-# $2 is number of categories for 1st categorical variable
-# $3 is number of categories for 2nd categorical variable
-# $4 is the file to write out the chi-squared statistic to
-# $5 is the file to write out the generated data to
-
-numSamples = $1
-numCategories1 = $2
-numCategories2 = $3
-
-o = Rand(rows=numCategories1, cols=numCategories2, min=0.0, max=1.0, pdf="uniform", seed=0)
-o = o / sum(o)
-
-probs1 = rowSums(o)
-probs1 = probs1 / sum(probs1)
-probs2 = colSums(o)
-probs2 = probs2 / sum(probs2)
-e = probs1 %*% probs2
-
-chisquared = sum((o-e)^2/e)
-write(chisquared, $4, format="binary")
-
-oCDF = Rand(rows=numCategories1, cols=numCategories2, min=0.0, max=0.0, pdf="uniform", seed=0)
-for(i in 1:numCategories1){
-	for(j in 1:numCategories2){
-		if(i==1 & j==1){
-			oCDF[i,j] = o[1,1]
-		}
-		if(i != 1 & j == 1){
-			oCDF[i,j] = oCDF[i-1,numCategories2] + o[i,j]
-		}
-		if(j > 1){
-			oCDF[i,j] = oCDF[i,j-1] + o[i,j]
-		}
-	}
-}
-
-one = Rand(rows=1, cols=1, min=1.0, max=1.0, pdf="uniform", seed=0)
-data = Rand(rows=numSamples, cols=2, min=0.0, max=0.0, pdf="uniform", seed=0)
-parfor(s in 1:numSamples){
-	r_mat = Rand(rows=1, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
-	r = castAsScalar(r_mat)
-
-	cat1 = -1
-	cat2 = -1
-	continue = 1
-	for(i in 1:numCategories1){
-		for(j in 1:numCategories2){
-			cdf = castAsScalar(oCDF[i,j])
-			if(continue == 1 & r <= cdf){
-				cat1 = i
-				cat2 = j
-				continue = 0
-			}
-		}
-	}
-	
-	data[s,1] = cat1*one
-	data[s,2] = cat2*one
-}
-write(data, $5, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates a two column matrix of categorical
+# variables
+# used to test systemml's chi-squared bivariate stat
+# computation
+
+# $1 is number of samples to generate
+# $2 is number of categories for 1st categorical variable
+# $3 is number of categories for 2nd categorical variable
+# $4 is the file to write out the chi-squared statistic to
+# $5 is the file to write out the generated data to
+
+numSamples = $1
+numCategories1 = $2
+numCategories2 = $3
+
+o = Rand(rows=numCategories1, cols=numCategories2, min=0.0, max=1.0, pdf="uniform", seed=0)
+o = o / sum(o)
+
+probs1 = rowSums(o)
+probs1 = probs1 / sum(probs1)
+probs2 = colSums(o)
+probs2 = probs2 / sum(probs2)
+e = probs1 %*% probs2
+
+chisquared = sum((o-e)^2/e)
+write(chisquared, $4, format="binary")
+
+oCDF = Rand(rows=numCategories1, cols=numCategories2, min=0.0, max=0.0, pdf="uniform", seed=0)
+for(i in 1:numCategories1){
+	for(j in 1:numCategories2){
+		if(i==1 & j==1){
+			oCDF[i,j] = o[1,1]
+		}
+		if(i != 1 & j == 1){
+			oCDF[i,j] = oCDF[i-1,numCategories2] + o[i,j]
+		}
+		if(j > 1){
+			oCDF[i,j] = oCDF[i,j-1] + o[i,j]
+		}
+	}
+}
+
+one = Rand(rows=1, cols=1, min=1.0, max=1.0, pdf="uniform", seed=0)
+data = Rand(rows=numSamples, cols=2, min=0.0, max=0.0, pdf="uniform", seed=0)
+parfor(s in 1:numSamples){
+	r_mat = Rand(rows=1, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
+	r = castAsScalar(r_mat)
+
+	cat1 = -1
+	cat2 = -1
+	continue = 1
+	for(i in 1:numCategories1){
+		for(j in 1:numCategories2){
+			cdf = castAsScalar(oCDF[i,j])
+			if(continue == 1 & r <= cdf){
+				cat1 = i
+				cat2 = j
+				continue = 0
+			}
+		}
+	}
+	
+	data[s,1] = cat1*one
+	data[s,2] = cat2*one
+}
+write(data, $5, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4DecisionTree1.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4DecisionTree1.dml b/scripts/datagen/genRandData4DecisionTree1.dml
index 3ef9067..b679783 100644
--- a/scripts/datagen/genRandData4DecisionTree1.dml
+++ b/scripts/datagen/genRandData4DecisionTree1.dml
@@ -1,39 +1,39 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-XCatFile = $XCat;
-YFile = $Y;
-num_records = $num_records;
-num_cat_features = $num_cat;
-num_class = $num_class;
-num_distinct = $num_distinct;
-sparsity = $sp;
-
-# generate class labels
-Y = floor (rand (rows = num_records, cols = 1, min = 1, max = num_class + 0.99999999999999)); 
-Y_bin = table (seq (1, num_records), Y); 
-write (Y_bin, YFile);
-
-# generate categorical features
-X_cat = floor (rand (rows = num_records, cols = num_cat_features, min = 1, max = num_distinct + 0.99999999999999, sparsity = sparsity));
-write (X_cat, XCatFile, format = "csv");
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+XCatFile = $XCat;
+YFile = $Y;
+num_records = $num_records;
+num_cat_features = $num_cat;
+num_class = $num_class;
+num_distinct = $num_distinct;
+sparsity = $sp;
+
+# generate class labels
+Y = floor (rand (rows = num_records, cols = 1, min = 1, max = num_class + 0.99999999999999)); 
+Y_bin = table (seq (1, num_records), Y); 
+write (Y_bin, YFile);
+
+# generate categorical features
+X_cat = floor (rand (rows = num_records, cols = num_cat_features, min = 1, max = num_distinct + 0.99999999999999, sparsity = sparsity));
+write (X_cat, XCatFile, format = "csv");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4DecisionTree2.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4DecisionTree2.dml b/scripts/datagen/genRandData4DecisionTree2.dml
index 85d3ad0..cc8341c 100644
--- a/scripts/datagen/genRandData4DecisionTree2.dml
+++ b/scripts/datagen/genRandData4DecisionTree2.dml
@@ -1,40 +1,40 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-transformPath = $tPath;
-transformSpec = $tSpec;
-XCatFile = $XCat;
-XFile = $X;
-num_records = $num_records;
-num_scale_features = $num_scale;
-sparsity = $sp;
-fmt = $fmt;
-
-# generate scale features
-X_scale = rand (rows = num_records, cols = num_scale_features, min = 0, max = 10, sparsity = sparsity); 
-
-# transform categorical features
-XCF = read (XCatFile);
-X_cat_transformed = transform (target = XCF, transformSpec = transformSpec, transformPath = transformPath);
-
-X = append (X_scale, X_cat_transformed);
-write (X, XFile, format = fmt);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+transformPath = $tPath;
+transformSpec = $tSpec;
+XCatFile = $XCat;
+XFile = $X;
+num_records = $num_records;
+num_scale_features = $num_scale;
+sparsity = $sp;
+fmt = $fmt;
+
+# generate scale features
+X_scale = rand (rows = num_records, cols = num_scale_features, min = 0, max = 10, sparsity = sparsity); 
+
+# transform categorical features
+XCF = read (XCatFile);
+X_cat_transformed = transform (target = XCF, transformSpec = transformSpec, transformPath = transformPath);
+
+X = append (X_scale, X_cat_transformed);
+write (X, XFile, format = fmt);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4FTest.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4FTest.dml b/scripts/datagen/genRandData4FTest.dml
index 91e7c50..bdd33b9 100644
--- a/scripts/datagen/genRandData4FTest.dml
+++ b/scripts/datagen/genRandData4FTest.dml
@@ -1,95 +1,95 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data for F-test
-#
-# $1 is number of groups (some of 
-#		which may share a gaussian)
-# $2 is number of actual groups 
-# $3 is number of points
-# $4 is mean of the gaussian means
-# $5 is mean of the gaussian std. deviations
-# $6 is file to store computed f-statistic
-# $7 is file to store generated data
-
-numGroups = $1
-numActualGroups = $2
-numSamples = $3
-meanOfMeans = $4
-meanOfStddevs = $5
-
-cntProbs = Rand(rows=numGroups, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
-cntProbs = cntProbs/sum(cntProbs)
-cntArr = round(cntProbs * numSamples)
-last_cnt = cntArr[numGroups,1]
-cntArr[numGroups,1] = numSamples - (sum(cntArr) - last_cnt)
-
-permut = Rand(rows=numActualGroups, cols=numGroups, min=0.0, max=0.0, pdf="uniform")
-ones = Rand(rows=numActualGroups, cols=1, min=1.0, max=1.0, pdf="uniform")
-permut[,1:numActualGroups] = diag(ones)
-
-one = Rand(rows=1, cols=1, min=1.0, max=1.0, pdf="uniform")
-copy_start_index = numActualGroups+1
-parfor(i in copy_start_index:numGroups){
-	r = Rand(rows=1, cols=1, min=1.0, max=numActualGroups, pdf="uniform", seed=0)
-	j = castAsScalar(round(r))
-	permut[j,i] = one
-}
-
-means_std = Rand(rows=numActualGroups, cols=1, pdf="normal", seed=0)
-abs_means = means_std + meanOfMeans
-means = t(t(abs_means) %*% permut)
-
-stddevs_std = Rand(rows=numActualGroups, cols=1, pdf="normal", seed=0)
-abs_stddevs = stddevs_std + meanOfStddevs
-stddevs = t(t(abs_stddevs) %*% permut)
-
-overall_mean = sum(means*cntArr)/numSamples
-
-explained_variance = sum(cntArr * (means - overall_mean)^2) / (numGroups-1.0)
-unexplained_variance = sum(cntArr * stddevs^2) / (numSamples - numGroups)
-f = explained_variance / unexplained_variance
-write(f, $6, format="binary")
-
-cntCDFs = cntProbs
-for(i in 2:numGroups){
-	cntCDFs[i,1] = cntCDFs[i-1,1] + cntProbs[i,1]
-}
-
-data = Rand(rows=numSamples, cols=1, min=0.0, max=0.0, pdf="uniform")
-parfor(i in 1:numSamples){
-	r_mat = Rand(rows=1, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
-	r1 = castAsScalar(r_mat)
-
-	g = -1
-	continue = 1
-	for(k in 1:numGroups){
-		cdf = castAsScalar(cntCDFs[k,1])
-		if(continue==1 & r1<=cdf){
-			g = k
-			continue=0
-		}	
-	}
-	
-	point = Rand(rows=1, cols=1, pdf="normal", seed=0)
-	data[i,1] = point*stddevs[g,1] + means[g,1]
-}
-write(data, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data for F-test
+#
+# $1 is number of groups (some of 
+#		which may share a gaussian)
+# $2 is number of actual groups 
+# $3 is number of points
+# $4 is mean of the gaussian means
+# $5 is mean of the gaussian std. deviations
+# $6 is file to store computed f-statistic
+# $7 is file to store generated data
+
+numGroups = $1
+numActualGroups = $2
+numSamples = $3
+meanOfMeans = $4
+meanOfStddevs = $5
+
+cntProbs = Rand(rows=numGroups, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
+cntProbs = cntProbs/sum(cntProbs)
+cntArr = round(cntProbs * numSamples)
+last_cnt = cntArr[numGroups,1]
+cntArr[numGroups,1] = numSamples - (sum(cntArr) - last_cnt)
+
+permut = Rand(rows=numActualGroups, cols=numGroups, min=0.0, max=0.0, pdf="uniform")
+ones = Rand(rows=numActualGroups, cols=1, min=1.0, max=1.0, pdf="uniform")
+permut[,1:numActualGroups] = diag(ones)
+
+one = Rand(rows=1, cols=1, min=1.0, max=1.0, pdf="uniform")
+copy_start_index = numActualGroups+1
+parfor(i in copy_start_index:numGroups){
+	r = Rand(rows=1, cols=1, min=1.0, max=numActualGroups, pdf="uniform", seed=0)
+	j = castAsScalar(round(r))
+	permut[j,i] = one
+}
+
+means_std = Rand(rows=numActualGroups, cols=1, pdf="normal", seed=0)
+abs_means = means_std + meanOfMeans
+means = t(t(abs_means) %*% permut)
+
+stddevs_std = Rand(rows=numActualGroups, cols=1, pdf="normal", seed=0)
+abs_stddevs = stddevs_std + meanOfStddevs
+stddevs = t(t(abs_stddevs) %*% permut)
+
+overall_mean = sum(means*cntArr)/numSamples
+
+explained_variance = sum(cntArr * (means - overall_mean)^2) / (numGroups-1.0)
+unexplained_variance = sum(cntArr * stddevs^2) / (numSamples - numGroups)
+f = explained_variance / unexplained_variance
+write(f, $6, format="binary")
+
+cntCDFs = cntProbs
+for(i in 2:numGroups){
+	cntCDFs[i,1] = cntCDFs[i-1,1] + cntProbs[i,1]
+}
+
+data = Rand(rows=numSamples, cols=1, min=0.0, max=0.0, pdf="uniform")
+parfor(i in 1:numSamples){
+	r_mat = Rand(rows=1, cols=1, min=0.0, max=1.0, pdf="uniform", seed=0)
+	r1 = castAsScalar(r_mat)
+
+	g = -1
+	continue = 1
+	for(k in 1:numGroups){
+		cdf = castAsScalar(cntCDFs[k,1])
+		if(continue==1 & r1<=cdf){
+			g = k
+			continue=0
+		}	
+	}
+	
+	point = Rand(rows=1, cols=1, pdf="normal", seed=0)
+	data[i,1] = point*stddevs[g,1] + means[g,1]
+}
+write(data, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4Kmeans.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4Kmeans.dml b/scripts/datagen/genRandData4Kmeans.dml
index 7abcee4..fe50ac5 100644
--- a/scripts/datagen/genRandData4Kmeans.dml
+++ b/scripts/datagen/genRandData4Kmeans.dml
@@ -1,120 +1,120 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# Generates random Gaussian-mixture data to test k-Means clustering algorithms
-#
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ----------------------------------------------------------------------------
-# nr    Int     ---     Number of records
-# nf    Int     ---     Number of features
-# nc    Int     ---     Number of clusters
-# dc    Double  ---     St.dev. of cluster "centroid" features from zero mean
-# dr    Double  ---     St.dev. of the 1-st feature in a record within cluster
-# fbf   Double  ---     Feature bias factor: Stdev(last) / Stdev(1-st) feature
-# cbf   Double  ---     Cluster bias factor: Prob[1-st clus] / Prob[k-th clus]
-# X     String  ---     Location to write matrix X with generated data records
-# C     String  ---     Location to write cluster "centroids" (Gaussian means)
-# Y     String  ---     Location to write assignment of records to cluster ids
-# YbyC  String  ---     Location to write rec-cluster assigns by min-dist to C
-# ----------------------------------------------------------------------------
-#
-# Example:
-# hadoop jar SystemML.jar -f genRandData4Kmeans.dml -nvargs nr=100000 nf=100
-#     nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx Y=Y.mtx YbyC=YbyC.mtx
-
-print ("BEGIN K-MEANS GENERATOR SCRIPT");
-
-num_records   = $nr;
-num_features  = $nf;
-num_centroids = $nc;
-dist_per_feature_centroids = $dc;
-dist_per_feature_first_record = $dr;
-feature_bias_factor = $fbf;
-cluster_bias_factor = $cbf;
-
-fileX    = ifdef ($X, "X");
-fileC    = ifdef ($C, "C");
-fileY    = ifdef ($Y, "Y");
-fileYbyC = ifdef ($YbyC, "YbyC");
-fmt      = ifdef ($fmt, "text");
-
-print ("Generating cluster distribution (mixture) centroids...");
-
-C = Rand (rows = num_centroids, cols = num_features, pdf = "normal");
-C = C * dist_per_feature_centroids;
-
-print ("Generating record-to-cluster assignments...");
-
-# Y is a multinomial in {1, ..., num_centroids} with 1 being more likely
-# than "num_centroids" by the factor of "cluster_bias_factor"
-
-rnd = Rand (rows = num_records, cols = 1, min = 0.0, max = 1.0, pdf = "uniform");
-if (cluster_bias_factor == 1.0) {
-    Y = round (0.5 + rnd * num_centroids);
-} else {
-    rnd_scaled = rnd * (1 - cluster_bias_factor ^ (- num_centroids / (num_centroids - 1)));
-    Y = round (0.5 - (num_centroids - 1) * log (1 - rnd_scaled) / log (cluster_bias_factor));
-}
-
-print ("Generating within-cluster random shifts...");
-
-X_shift = Rand (rows = num_records, cols = num_features, pdf = "normal");
-feature_factors = dist_per_feature_first_record * 
-    exp ((seq (1, num_features) - 1) / (num_features - 1) * log (feature_bias_factor));
-X_shift = X_shift %*% diag (feature_factors);
-
-print ("Generating records by shifting from centroids..."); 
-
-Y_bitmap_raw = table (seq (1, num_records), Y);
-Y_bitmap = matrix (0, rows = num_records, cols = num_centroids);
-Y_bitmap [, 1 : ncol (Y_bitmap_raw)] = Y_bitmap_raw;
-X = Y_bitmap %*% C + X_shift;
-
-print ("Computing record-to-cluster assignments by minimum centroid distance...");
-
-D = t(t(-2 * (X %*% t(C))) + rowSums (C ^ 2));
-P = ppred (D, rowMins (D), "<=");
-aggr_P = t(cumsum (t(P)));
-Y_by_C = rowSums (ppred (aggr_P, 0, "==")) + 1;
-
-print ("Computing useful statistics...");
-
-sumXsq = sum (X ^ 2);
-default_wcss  = sumXsq - sum (colSums (X) ^ 2) / num_records;
-attained_wcss = sumXsq + sum (rowMins (D));
-
-print ("Default (single-cluster) WCSS = " + default_wcss);
-print (num_centroids + "-cluster WCSS attained by the mixture centroids = " + attained_wcss);
-
-print ("Writing out the resulting dataset...");
-
-write (X, fileX, format = fmt);
-write (C, fileC, format = fmt);
-write (Y, fileY, format = fmt);
-write (Y_by_C, fileYbyC, format = fmt);
-
-print ("Please run the scoring script to compare " + fileY + " with " + fileYbyC); 
-
-print ("DONE: K-MEANS GENERATOR SCRIPT");
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# Generates random Gaussian-mixture data to test k-Means clustering algorithms
+#
+# INPUT PARAMETERS:
+# ----------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ----------------------------------------------------------------------------
+# nr    Int     ---     Number of records
+# nf    Int     ---     Number of features
+# nc    Int     ---     Number of clusters
+# dc    Double  ---     St.dev. of cluster "centroid" features from zero mean
+# dr    Double  ---     St.dev. of the 1-st feature in a record within cluster
+# fbf   Double  ---     Feature bias factor: Stdev(last) / Stdev(1-st) feature
+# cbf   Double  ---     Cluster bias factor: Prob[1-st clus] / Prob[k-th clus]
+# X     String  ---     Location to write matrix X with generated data records
+# C     String  ---     Location to write cluster "centroids" (Gaussian means)
+# Y     String  ---     Location to write assignment of records to cluster ids
+# YbyC  String  ---     Location to write rec-cluster assigns by min-dist to C
+# ----------------------------------------------------------------------------
+#
+# Example:
+# hadoop jar SystemML.jar -f genRandData4Kmeans.dml -nvargs nr=100000 nf=100
+#     nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx Y=Y.mtx YbyC=YbyC.mtx
+
+print ("BEGIN K-MEANS GENERATOR SCRIPT");
+
+num_records   = $nr;
+num_features  = $nf;
+num_centroids = $nc;
+dist_per_feature_centroids = $dc;
+dist_per_feature_first_record = $dr;
+feature_bias_factor = $fbf;
+cluster_bias_factor = $cbf;
+
+fileX    = ifdef ($X, "X");
+fileC    = ifdef ($C, "C");
+fileY    = ifdef ($Y, "Y");
+fileYbyC = ifdef ($YbyC, "YbyC");
+fmt      = ifdef ($fmt, "text");
+
+print ("Generating cluster distribution (mixture) centroids...");
+
+C = Rand (rows = num_centroids, cols = num_features, pdf = "normal");
+C = C * dist_per_feature_centroids;
+
+print ("Generating record-to-cluster assignments...");
+
+# Y is a multinomial in {1, ..., num_centroids} with 1 being more likely
+# than "num_centroids" by the factor of "cluster_bias_factor"
+
+rnd = Rand (rows = num_records, cols = 1, min = 0.0, max = 1.0, pdf = "uniform");
+if (cluster_bias_factor == 1.0) {
+    Y = round (0.5 + rnd * num_centroids);
+} else {
+    rnd_scaled = rnd * (1 - cluster_bias_factor ^ (- num_centroids / (num_centroids - 1)));
+    Y = round (0.5 - (num_centroids - 1) * log (1 - rnd_scaled) / log (cluster_bias_factor));
+}
+
+print ("Generating within-cluster random shifts...");
+
+X_shift = Rand (rows = num_records, cols = num_features, pdf = "normal");
+feature_factors = dist_per_feature_first_record * 
+    exp ((seq (1, num_features) - 1) / (num_features - 1) * log (feature_bias_factor));
+X_shift = X_shift %*% diag (feature_factors);
+
+print ("Generating records by shifting from centroids..."); 
+
+Y_bitmap_raw = table (seq (1, num_records), Y);
+Y_bitmap = matrix (0, rows = num_records, cols = num_centroids);
+Y_bitmap [, 1 : ncol (Y_bitmap_raw)] = Y_bitmap_raw;
+X = Y_bitmap %*% C + X_shift;
+
+print ("Computing record-to-cluster assignments by minimum centroid distance...");
+
+D = t(t(-2 * (X %*% t(C))) + rowSums (C ^ 2));
+P = ppred (D, rowMins (D), "<=");
+aggr_P = t(cumsum (t(P)));
+Y_by_C = rowSums (ppred (aggr_P, 0, "==")) + 1;
+
+print ("Computing useful statistics...");
+
+sumXsq = sum (X ^ 2);
+default_wcss  = sumXsq - sum (colSums (X) ^ 2) / num_records;
+attained_wcss = sumXsq + sum (rowMins (D));
+
+print ("Default (single-cluster) WCSS = " + default_wcss);
+print (num_centroids + "-cluster WCSS attained by the mixture centroids = " + attained_wcss);
+
+print ("Writing out the resulting dataset...");
+
+write (X, fileX, format = fmt);
+write (C, fileC, format = fmt);
+write (Y, fileY, format = fmt);
+write (Y_by_C, fileYbyC, format = fmt);
+
+print ("Please run the scoring script to compare " + fileY + " with " + fileYbyC); 
+
+print ("DONE: K-MEANS GENERATOR SCRIPT");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4LinearRegression.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4LinearRegression.dml b/scripts/datagen/genRandData4LinearRegression.dml
index f0d214e..b257804 100644
--- a/scripts/datagen/genRandData4LinearRegression.dml
+++ b/scripts/datagen/genRandData4LinearRegression.dml
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates data to test linear regression
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is maximum feature value (absolute value)
-# $4 is maximum weight (absolute value)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 is 0/1. 0 suppresses noise, 1 will add noise to Y
-# $9 is b, 0 disables intercept
-# $10 controls sparsity in the generated data
-# $11 output format
-
-numSamples = $1
-numFeatures = $2
-maxFeatureValue = $3
-maxWeight = $4
-addNoise = $8
-b = $9
-fmt = $11
-
-X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", seed=0, sparsity=$10)
-w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-X = X * maxFeatureValue
-w = w * maxWeight
-Y = X %*% w
-
-if(b!=0) {
-	b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
-	w =  t(append(t(w), b_mat))
-	Y = Y + b
-}
-
-noise = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
-Y = Y + addNoise*noise
-
-write(w, $5, format=fmt)
-write(X, $6, format=fmt)
-write(Y, $7, format=fmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates data to test linear regression
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is maximum feature value (absolute value)
+# $4 is maximum weight (absolute value)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 is 0/1. 0 suppresses noise, 1 will add noise to Y
+# $9 is b, 0 disables intercept
+# $10 controls sparsity in the generated data
+# $11 output format
+
+numSamples = $1
+numFeatures = $2
+maxFeatureValue = $3
+maxWeight = $4
+addNoise = $8
+b = $9
+fmt = $11
+
+X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", seed=0, sparsity=$10)
+w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+X = X * maxFeatureValue
+w = w * maxWeight
+Y = X %*% w
+
+if(b!=0) {
+	b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
+	w =  t(append(t(w), b_mat))
+	Y = Y + b
+}
+
+noise = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
+Y = Y + addNoise*noise
+
+write(w, $5, format=fmt)
+write(X, $6, format=fmt)
+write(Y, $7, format=fmt)


[05/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_threadid_recompile1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_threadid_recompile1.dml b/src/test/scripts/functions/parfor/parfor_threadid_recompile1.dml
index 8c19b2a..1995f9d 100644
--- a/src/test/scripts/functions/parfor/parfor_threadid_recompile1.dml
+++ b/src/test/scripts/functions/parfor/parfor_threadid_recompile1.dml
@@ -19,32 +19,32 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1)
-
-num_bin_defns = sum(A)
-bin_defns = matrix(0, rows=num_bin_defns, cols=2)
-
-attr2pos = matrix(0, rows=nrow(A), cols=2)
-pos = 1
-for(i in 1:nrow(A)){
-	number_of_bins = castAsScalar(A[i,1])
-	attr2pos[i,1] = pos
-	attr2pos[i,2] = pos + number_of_bins - 1
-	pos = pos + number_of_bins
-}
-
-for(i in 1:nrow(A), check=0){
-	num_bins = castAsScalar(A[i,1])
-	
-	start_position = castAsScalar(attr2pos[i,1])
-	end_position = castAsScalar(attr2pos[i,2])
-	
-	#SEQ CALL 1
-	bin_defns[start_position:end_position,1] = seq(1, num_bins, 1)
-	
-	#SEQ CALL 2
-	bin_defns[start_position:end_position,2] = seq(1, 1+num_bins-1, 1)
-}
-write(bin_defns, $2)
+
+
+A = read($1)
+
+num_bin_defns = sum(A)
+bin_defns = matrix(0, rows=num_bin_defns, cols=2)
+
+attr2pos = matrix(0, rows=nrow(A), cols=2)
+pos = 1
+for(i in 1:nrow(A)){
+	number_of_bins = castAsScalar(A[i,1])
+	attr2pos[i,1] = pos
+	attr2pos[i,2] = pos + number_of_bins - 1
+	pos = pos + number_of_bins
+}
+
+for(i in 1:nrow(A), check=0){
+	num_bins = castAsScalar(A[i,1])
+	
+	start_position = castAsScalar(attr2pos[i,1])
+	end_position = castAsScalar(attr2pos[i,2])
+	
+	#SEQ CALL 1
+	bin_defns[start_position:end_position,1] = seq(1, num_bins, 1)
+	
+	#SEQ CALL 2
+	bin_defns[start_position:end_position,2] = seq(1, 1+num_bins-1, 1)
+}
+write(bin_defns, $2)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_threadid_recompile2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_threadid_recompile2.dml b/src/test/scripts/functions/parfor/parfor_threadid_recompile2.dml
index 85e1a6f..ab89580 100644
--- a/src/test/scripts/functions/parfor/parfor_threadid_recompile2.dml
+++ b/src/test/scripts/functions/parfor/parfor_threadid_recompile2.dml
@@ -19,32 +19,32 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1)
-
-num_bin_defns = sum(A)
-bin_defns = matrix(0, rows=num_bin_defns, cols=2)
-
-attr2pos = matrix(0, rows=nrow(A), cols=2)
-pos = 1
-for(i in 1:nrow(A)){
-	number_of_bins = castAsScalar(A[i,1])
-	attr2pos[i,1] = pos
-	attr2pos[i,2] = pos + number_of_bins - 1
-	pos = pos + number_of_bins
-}
-
-parfor(i in 1:nrow(A), check=0){
-	num_bins = castAsScalar(A[i,1])
-	
-	start_position = castAsScalar(attr2pos[i,1])
-	end_position = castAsScalar(attr2pos[i,2])
-	
-	#SEQ CALL 1
-	bin_defns[start_position:end_position,1] = seq(1, num_bins, 1)
-	
-	#SEQ CALL 2
-	bin_defns[start_position:end_position,2] = seq(1, 1+num_bins-1, 1)
-}
-write(bin_defns, $2)
+
+
+A = read($1)
+
+num_bin_defns = sum(A)
+bin_defns = matrix(0, rows=num_bin_defns, cols=2)
+
+attr2pos = matrix(0, rows=nrow(A), cols=2)
+pos = 1
+for(i in 1:nrow(A)){
+	number_of_bins = castAsScalar(A[i,1])
+	attr2pos[i,1] = pos
+	attr2pos[i,2] = pos + number_of_bins - 1
+	pos = pos + number_of_bins
+}
+
+parfor(i in 1:nrow(A), check=0){
+	num_bins = castAsScalar(A[i,1])
+	
+	start_position = castAsScalar(attr2pos[i,1])
+	end_position = castAsScalar(attr2pos[i,2])
+	
+	#SEQ CALL 1
+	bin_defns[start_position:end_position,1] = seq(1, num_bins, 1)
+	
+	#SEQ CALL 2
+	bin_defns[start_position:end_position,2] = seq(1, 1+num_bins-1, 1)
+}
+write(bin_defns, $2)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/piggybacking/Piggybacking1_append.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/piggybacking/Piggybacking1_append.dml b/src/test/scripts/functions/piggybacking/Piggybacking1_append.dml
index 99acd13..9e35d60 100644
--- a/src/test/scripts/functions/piggybacking/Piggybacking1_append.dml
+++ b/src/test/scripts/functions/piggybacking/Piggybacking1_append.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = matrix(1,10,10);
-v = matrix(2,10,1);
-
-v = v+sum(A);
-B = append(A,v);
-
-s = sum(B);
-
-write(s, $1);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = matrix(1,10,10);
+v = matrix(2,10,1);
+
+v = v+sum(A);
+B = append(A,v);
+
+s = sum(B);
+
+write(s, $1);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.R b/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.R
index e4352f5..011853c 100644
--- a/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.R
+++ b/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.R
@@ -1,32 +1,32 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-A = readMM(args[1]);
-x = readMM(args[2]);
-
-y = A %*% x;
-z = A %*% y;
-
-writeMM(as(z, "CsparseMatrix"), args[3]); 
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+A = readMM(args[1]);
+x = readMM(args[2]);
+
+y = A %*% x;
+z = A %*% y;
+
+writeMM(as(z, "CsparseMatrix"), args[3]); 
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.dml b/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.dml
index 1b3be46..227e29b 100644
--- a/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.dml
+++ b/src/test/scripts/functions/piggybacking/Piggybacking1_mvmult.dml
@@ -1,28 +1,28 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = read($1); #Rand(rows=100, cols=100, seed=10);
-x = read($2); #Rand(rows=100, cols=1, seed=10);
-
-y = A %*% x;
-z = A %*% y;
-
-write(z, $3);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1); #Rand(rows=100, cols=100, seed=10);
+x = read($2); #Rand(rows=100, cols=1, seed=10);
+
+y = A %*% x;
+z = A %*% y;
+
+write(z, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/piggybacking/Piggybacking_eliminate.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/piggybacking/Piggybacking_eliminate.dml b/src/test/scripts/functions/piggybacking/Piggybacking_eliminate.dml
index cae4097..55b18a3 100644
--- a/src/test/scripts/functions/piggybacking/Piggybacking_eliminate.dml
+++ b/src/test/scripts/functions/piggybacking/Piggybacking_eliminate.dml
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Script to reproduce a piggybacking bug related to "eliminated" group lops.
-# The bug appears only when MMultMethod=CPMM was chosen (or forced) for the 
-# operation t(A)%*%E.
-
-A = read("DML/data/A"); #matrix(1,rows=10,cols=10) #
-B = read("DML/data/B"); #matrix(1,rows=10,cols=10) #
-E = read("DML/data/E"); #matrix(1,rows=10,cols=10) #
-
-F = t(A) + B 
-G = t(A) %*% E
-
-H = F + G
-
-write(H, "DML/data/H")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Script to reproduce a piggybacking bug related to "eliminated" group lops.
+# The bug appears only when MMultMethod=CPMM was chosen (or forced) for the 
+# operation t(A)%*%E.
+
+A = read("DML/data/A"); #matrix(1,rows=10,cols=10) #
+B = read("DML/data/B"); #matrix(1,rows=10,cols=10) #
+E = read("DML/data/E"); #matrix(1,rows=10,cols=10) #
+
+F = t(A) + B 
+G = t(A) %*% E
+
+H = F + G
+
+write(H, "DML/data/H")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/piggybacking/Piggybacking_iqm.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/piggybacking/Piggybacking_iqm.dml b/src/test/scripts/functions/piggybacking/Piggybacking_iqm.dml
index 9a21258..4e4d95a 100644
--- a/src/test/scripts/functions/piggybacking/Piggybacking_iqm.dml
+++ b/src/test/scripts/functions/piggybacking/Piggybacking_iqm.dml
@@ -1,32 +1,32 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# DML script to reproduce a piggybacking bug that caused
-# combineunary lop to (eagerly) piggbacked into GMR that performs A[,4].
-# However, combineunary can only be piggybacked into SortMR.
-
-A = matrix(1,rows=10,cols=10);
-
-F = A[,4];
-x = interQuartileMean(F);
-
-#print("IQM = " + x );
-write(x, $1);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script to reproduce a piggybacking bug that caused
+# combineunary lop to (eagerly) piggbacked into GMR that performs A[,4].
+# However, combineunary can only be piggybacked into SortMR.
+
+A = matrix(1,rows=10,cols=10);
+
+F = A[,4];
+x = interQuartileMean(F);
+
+#print("IQM = " + x );
+write(x, $1);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/RewriteWeightedSigmoid.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/RewriteWeightedSigmoid.R b/src/test/scripts/functions/quaternary/RewriteWeightedSigmoid.R
index d2edb9a..78ad502 100644
--- a/src/test/scripts/functions/quaternary/RewriteWeightedSigmoid.R
+++ b/src/test/scripts/functions/quaternary/RewriteWeightedSigmoid.R
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C =  B * (1/(1+exp(rowSums(B*(matrix(1,nrow(B),1)%*%A)))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C =  B * (1/(1+exp(rowSums(B*(matrix(1,nrow(B),1)%*%A)))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedCeMM.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedCeMM.R b/src/test/scripts/functions/quaternary/WeightedCeMM.R
index 0d87d72..9ac0eb4 100644
--- a/src/test/scripts/functions/quaternary/WeightedCeMM.R
+++ b/src/test/scripts/functions/quaternary/WeightedCeMM.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = as.matrix(sum(X * log(U%*%t(V))));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = as.matrix(sum(X * log(U%*%t(V))));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMLeft.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMLeft.R b/src/test/scripts/functions/quaternary/WeightedDivMMLeft.R
index e627761..44f6ea3 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMLeft.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMLeft.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = t(t(U) %*% (W/(U%*%t(V))));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = t(t(U) %*% (W/(U%*%t(V))));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMMultBasic.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMMultBasic.R b/src/test/scripts/functions/quaternary/WeightedDivMMMultBasic.R
index 3b1a75b..5ac190e 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMMultBasic.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMMultBasic.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = W*(U%*%t(V));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = W*(U%*%t(V));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMMultLeft.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMMultLeft.R b/src/test/scripts/functions/quaternary/WeightedDivMMMultLeft.R
index 1e2d06c..b915b42 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMMultLeft.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMMultLeft.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = t(t(U) %*% (W*(U%*%t(V))));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = t(t(U) %*% (W*(U%*%t(V))));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusLeft.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusLeft.R b/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusLeft.R
index 84a3044..fd656f1 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusLeft.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusLeft.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = t(t(U) %*% ((W!=0)*(U%*%t(V)-W)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = t(t(U) %*% ((W!=0)*(U%*%t(V)-W)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusRight.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusRight.R b/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusRight.R
index 33d59c7..f3ba1e9 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusRight.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMMultMinusRight.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = ((W!=0)*(U%*%t(V)-W)) %*% V;
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = ((W!=0)*(U%*%t(V)-W)) %*% V;
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMMultRight.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMMultRight.R b/src/test/scripts/functions/quaternary/WeightedDivMMMultRight.R
index dd95d6c..7f34ae5 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMMultRight.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMMultRight.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = (W*(U%*%t(V))) %*% V;
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = (W*(U%*%t(V))) %*% V;
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedDivMMRight.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedDivMMRight.R b/src/test/scripts/functions/quaternary/WeightedDivMMRight.R
index 5b05d54..e279ff1 100644
--- a/src/test/scripts/functions/quaternary/WeightedDivMMRight.R
+++ b/src/test/scripts/functions/quaternary/WeightedDivMMRight.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = (W/(U%*%t(V))) %*% V;
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = (W/(U%*%t(V))) %*% V;
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSigmoidP1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSigmoidP1.R b/src/test/scripts/functions/quaternary/WeightedSigmoidP1.R
index db193cb..50f57ce 100644
--- a/src/test/scripts/functions/quaternary/WeightedSigmoidP1.R
+++ b/src/test/scripts/functions/quaternary/WeightedSigmoidP1.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-UV = U%*%t(V);
-R = X * (1/(1 + exp(-UV)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+UV = U%*%t(V);
+R = X * (1/(1 + exp(-UV)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSigmoidP2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSigmoidP2.R b/src/test/scripts/functions/quaternary/WeightedSigmoidP2.R
index d1b1a52..58a0ced 100644
--- a/src/test/scripts/functions/quaternary/WeightedSigmoidP2.R
+++ b/src/test/scripts/functions/quaternary/WeightedSigmoidP2.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-UV = U%*%t(V);
-R = X * log(1/(1 + exp(-UV)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+UV = U%*%t(V);
+R = X * log(1/(1 + exp(-UV)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSigmoidP3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSigmoidP3.R b/src/test/scripts/functions/quaternary/WeightedSigmoidP3.R
index 34d2e54..3eaa422 100644
--- a/src/test/scripts/functions/quaternary/WeightedSigmoidP3.R
+++ b/src/test/scripts/functions/quaternary/WeightedSigmoidP3.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-UV = -(U%*%t(V));
-R = X * (1/(1 + exp(-UV)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+UV = -(U%*%t(V));
+R = X * (1/(1 + exp(-UV)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSigmoidP4.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSigmoidP4.R b/src/test/scripts/functions/quaternary/WeightedSigmoidP4.R
index 495f80f..10c7691 100644
--- a/src/test/scripts/functions/quaternary/WeightedSigmoidP4.R
+++ b/src/test/scripts/functions/quaternary/WeightedSigmoidP4.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-UV = -(U%*%t(V));
-R = X * log(1/(1 + exp(-UV)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+UV = -(U%*%t(V));
+R = X * log(1/(1 + exp(-UV)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossNo.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossNo.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossNo.R
index 0fcd0f0..4acdd8b 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossNo.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossNo.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-sl = sum( (X - U %*% t(V)) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+sl = sum( (X - U %*% t(V)) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossNo2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossNo2.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossNo2.R
index 36b9d01..80ed1e2 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossNo2.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossNo2.R
@@ -1,37 +1,37 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-sl = sum( (U %*% t(V) - X) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+sl = sum( (U %*% t(V) - X) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossPost.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossPost.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossPost.R
index 9704b25..81cd582 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossPost.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossPost.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-sl = sum( W * (X - U %*% t(V)) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+sl = sum( W * (X - U %*% t(V)) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossPost2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossPost2.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossPost2.R
index 6b07556..0d26a71 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossPost2.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossPost2.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-sl = sum( W * (U %*% t(V) - X) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+sl = sum( W * (U %*% t(V) - X) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossPostNz.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossPostNz.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossPostNz.R
index 1e33024..fed3686 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossPostNz.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossPostNz.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-sl = sum( (X!=0) * (X - U %*% t(V)) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+sl = sum( (X!=0) * (X - U %*% t(V)) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossPre.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossPre.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossPre.R
index ca89ba7..3b4b2fb 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossPre.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossPre.R
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-sl = sum( (X - W * (U %*% t(V))) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+sl = sum( (X - W * (U %*% t(V))) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedSquaredLossPre2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedSquaredLossPre2.R b/src/test/scripts/functions/quaternary/WeightedSquaredLossPre2.R
index ba09e87..c194a09 100644
--- a/src/test/scripts/functions/quaternary/WeightedSquaredLossPre2.R
+++ b/src/test/scripts/functions/quaternary/WeightedSquaredLossPre2.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-
-sl = sum( (W * (U %*% t(V)) - X) ^ 2 );
-R = as.matrix(sl);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+
+sl = sum( (W * (U %*% t(V)) - X) ^ 2 );
+R = as.matrix(sl);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedUnaryMMExpDiv.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedUnaryMMExpDiv.R b/src/test/scripts/functions/quaternary/WeightedUnaryMMExpDiv.R
index 0c8f7f7..aa00272 100644
--- a/src/test/scripts/functions/quaternary/WeightedUnaryMMExpDiv.R
+++ b/src/test/scripts/functions/quaternary/WeightedUnaryMMExpDiv.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = W/exp(U%*%t(V));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = W/exp(U%*%t(V));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedUnaryMMExpMult.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedUnaryMMExpMult.R b/src/test/scripts/functions/quaternary/WeightedUnaryMMExpMult.R
index ab2c60e..c2284ce 100644
--- a/src/test/scripts/functions/quaternary/WeightedUnaryMMExpMult.R
+++ b/src/test/scripts/functions/quaternary/WeightedUnaryMMExpMult.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = W*exp(U%*%t(V));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = W*exp(U%*%t(V));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedUnaryMMMult2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedUnaryMMMult2.R b/src/test/scripts/functions/quaternary/WeightedUnaryMMMult2.R
index 3f868fa..26b25ce 100644
--- a/src/test/scripts/functions/quaternary/WeightedUnaryMMMult2.R
+++ b/src/test/scripts/functions/quaternary/WeightedUnaryMMMult2.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = W*(2*(U%*%t(V)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = W*(2*(U%*%t(V)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/quaternary/WeightedUnaryMMPow2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/quaternary/WeightedUnaryMMPow2.R b/src/test/scripts/functions/quaternary/WeightedUnaryMMPow2.R
index 62f2af0..cfc04fe 100644
--- a/src/test/scripts/functions/quaternary/WeightedUnaryMMPow2.R
+++ b/src/test/scripts/functions/quaternary/WeightedUnaryMMPow2.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
-U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-
-R = W/(U%*%t(V))^2;
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+W = as.matrix(readMM(paste(args[1], "W.mtx", sep="")))
+U = as.matrix(readMM(paste(args[1], "U.mtx", sep="")))
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+
+R = W/(U%*%t(V))^2;
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/LiteralReplaceCastScalar.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/LiteralReplaceCastScalar.dml b/src/test/scripts/functions/recompile/LiteralReplaceCastScalar.dml
index 55655c5..41971bf 100644
--- a/src/test/scripts/functions/recompile/LiteralReplaceCastScalar.dml
+++ b/src/test/scripts/functions/recompile/LiteralReplaceCastScalar.dml
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-scalar1 = $1;
-
-if( 1==1 ){
-   A = Rand(rows=10, cols=10, min=1, max=2);
-   B = Rand(rows=10, cols=10, min=3, max=4);
-   C = Rand(rows=as.scalar(A[1,1]), cols=as.scalar(B[1,1]));
-}  
-
-ret1 = sum(C) * as.double(scalar1);
-ret2 = sum(C) * as.integer(scalar1);
-
-if( sum(C)>0 & as.logical(scalar1) ){
-   ret3 = sum(C) * as.double(scalar1);
-}   
-else {
-   ret3 = sum(C);
-}   
-
-print("Ret1 = "+ret1);
-print("Ret2 = "+ret2);
+
+scalar1 = $1;
+
+if( 1==1 ){
+   A = Rand(rows=10, cols=10, min=1, max=2);
+   B = Rand(rows=10, cols=10, min=3, max=4);
+   C = Rand(rows=as.scalar(A[1,1]), cols=as.scalar(B[1,1]));
+}  
+
+ret1 = sum(C) * as.double(scalar1);
+ret2 = sum(C) * as.integer(scalar1);
+
+if( sum(C)>0 & as.logical(scalar1) ){
+   ret3 = sum(C) * as.double(scalar1);
+}   
+else {
+   ret3 = sum(C);
+}   
+
+print("Ret1 = "+ret1);
+print("Ret2 = "+ret2);
 print("Ret3 = "+ret3);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/append_nnz.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/append_nnz.dml b/src/test/scripts/functions/recompile/append_nnz.dml
index 4c7b928..9f5b5ea 100644
--- a/src/test/scripts/functions/recompile/append_nnz.dml
+++ b/src/test/scripts/functions/recompile/append_nnz.dml
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-intercept_status = 2;
-
-X = read ($1);
-
-n = nrow (X);
-m = ncol (X);
-
-ones_n = matrix (1, rows = n, cols = 1);
-zero_cell = matrix (0, rows = 1, cols = 1);
-
-shift_X_cols = matrix (0, rows = 1, cols = m);
-
-if (intercept_status == 2) {
-    X = (X + ones_n %*% shift_X_cols);
-}
-
-X = append (X, ones_n);
-
-if(1==1){ }
-
-print("sum="+sum(X)) 
+
+intercept_status = 2;
+
+X = read ($1);
+
+n = nrow (X);
+m = ncol (X);
+
+ones_n = matrix (1, rows = n, cols = 1);
+zero_cell = matrix (0, rows = 1, cols = 1);
+
+shift_X_cols = matrix (0, rows = 1, cols = m);
+
+if (intercept_status == 2) {
+    X = (X + ones_n %*% shift_X_cols);
+}
+
+X = append (X, ones_n);
+
+if(1==1){ }
+
+print("sum="+sum(X)) 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_if.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_if.R b/src/test/scripts/functions/recompile/constant_propagation_if.R
index 057438f..1b86630 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_if.R
+++ b/src/test/scripts/functions/recompile/constant_propagation_if.R
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-numrows = as.integer(args[1]);
-numcols = as.integer(args[2]);
-
-if( 1==1 )
-{
-   numrows = numrows + 1;
-   numcols = numcols + 2;
-}  
-
-X = matrix(1, numrows, numcols);
-
-writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+numrows = as.integer(args[1]);
+numcols = as.integer(args[2]);
+
+if( 1==1 )
+{
+   numrows = numrows + 1;
+   numcols = numcols + 2;
+}  
+
+X = matrix(1, numrows, numcols);
+
+writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_if.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_if.dml b/src/test/scripts/functions/recompile/constant_propagation_if.dml
index 742fe3e..e3c4178 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_if.dml
+++ b/src/test/scripts/functions/recompile/constant_propagation_if.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-numrows = $1;
-numcols = $2;
-
-if( 1==1 )
-{
-   numrows = numrows + 1;
-   numcols = numcols + 2;
-}  
-
-X = matrix(1, rows=numrows, cols=numcols);
-
-write(X, $3);       
+
+numrows = $1;
+numcols = $2;
+
+if( 1==1 )
+{
+   numrows = numrows + 1;
+   numcols = numcols + 2;
+}  
+
+X = matrix(1, rows=numrows, cols=numcols);
+
+write(X, $3);       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_sb.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_sb.R b/src/test/scripts/functions/recompile/constant_propagation_sb.R
index a5dd31a..cfa6a00 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_sb.R
+++ b/src/test/scripts/functions/recompile/constant_propagation_sb.R
@@ -19,29 +19,29 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-numrows = as.integer(args[1]);
-numcols = as.integer(args[2]);
-
-if( 1==1 ){}
-
-numrows2 = numrows;
-numcols2 = numcols;
-
-if( 1!=1 )
-{
-   numrows2 = numrows + 1;
-   numcols2 = numcols + 2;
-}  
-
-
-X = matrix(1, numrows2, numcols2);
-
-writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+numrows = as.integer(args[1]);
+numcols = as.integer(args[2]);
+
+if( 1==1 ){}
+
+numrows2 = numrows;
+numcols2 = numcols;
+
+if( 1!=1 )
+{
+   numrows2 = numrows + 1;
+   numcols2 = numcols + 2;
+}  
+
+
+X = matrix(1, numrows2, numcols2);
+
+writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_sb.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_sb.dml b/src/test/scripts/functions/recompile/constant_propagation_sb.dml
index 2038b95..add79ed 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_sb.dml
+++ b/src/test/scripts/functions/recompile/constant_propagation_sb.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-numrows = $1;
-numcols = $2;
-
-if( 1==1 ){}
-
-numrows2 = numrows;
-numcols2 = numcols;
-
-if( 1!=1 )
-{
-   numrows2 = numrows + 1;
-   numcols2 = numcols + 2;
-}  
-
-X = matrix(1, rows=numrows2, cols=numcols2);
-
-write(X, $3);       
+
+numrows = $1;
+numcols = $2;
+
+if( 1==1 ){}
+
+numrows2 = numrows;
+numcols2 = numcols;
+
+if( 1!=1 )
+{
+   numrows2 = numrows + 1;
+   numcols2 = numcols + 2;
+}  
+
+X = matrix(1, rows=numrows2, cols=numcols2);
+
+write(X, $3);       



[36/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparableDesc.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparableDesc.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparableDesc.java
index 19c35f1..3c97c9b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparableDesc.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparableDesc.java
@@ -1,49 +1,49 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import org.apache.hadoop.io.DoubleWritable;
-
-public class IndexSortComparableDesc extends IndexSortComparable
-{
-	
-	@Override
-	public int compareTo(Object o) 
-	{
-		//descending order (note: we cannot just inverted the ascending order)
-		if( o instanceof DoubleWritable ) {
-			int tmp = _dval.compareTo((DoubleWritable) o);
-			return (( tmp!=0 ) ? -1*tmp : tmp); //prevent -0
-		}
-		//compare double value and index (e.g., for stable sort)
-		else if( o instanceof IndexSortComparable) {
-			IndexSortComparable that = (IndexSortComparable)o;
-			int tmp = _dval.compareTo(that._dval);
-			tmp = (( tmp!=0 ) ? -1*tmp : tmp); //prevent -0
-			if( tmp==0 ) //secondary sort
-				tmp = _lval.compareTo(that._lval);
-			return tmp;
-		}	
-		else {
-			throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
-		}
-		
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import org.apache.hadoop.io.DoubleWritable;
+
+public class IndexSortComparableDesc extends IndexSortComparable
+{
+	
+	@Override
+	public int compareTo(Object o) 
+	{
+		//descending order (note: we cannot just inverted the ascending order)
+		if( o instanceof DoubleWritable ) {
+			int tmp = _dval.compareTo((DoubleWritable) o);
+			return (( tmp!=0 ) ? -1*tmp : tmp); //prevent -0
+		}
+		//compare double value and index (e.g., for stable sort)
+		else if( o instanceof IndexSortComparable) {
+			IndexSortComparable that = (IndexSortComparable)o;
+			int tmp = _dval.compareTo(that._dval);
+			tmp = (( tmp!=0 ) ? -1*tmp : tmp); //prevent -0
+			if( tmp==0 ) //secondary sort
+				tmp = _lval.compareTo(that._lval);
+			return tmp;
+		}	
+		else {
+			throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
+		}
+		
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortMapper.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortMapper.java
index 686f79a..ee0af1e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortMapper.java
@@ -1,75 +1,75 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.matrix.SortMR;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-public class IndexSortMapper extends MapReduceBase 
-   implements Mapper<MatrixIndexes, MatrixBlock, IndexSortComparable, LongWritable>
-{
-		
-	  private int _brlen = -1;
-	  
-	  //reuse writables
-	  private LongWritable   _tmpLong = new LongWritable();
-	  private IndexSortComparable _tmpSortKey = null;
-		
-	  @Override
-	  public void map(MatrixIndexes key, MatrixBlock value, OutputCollector<IndexSortComparable, LongWritable> out, Reporter reporter) 
-        throws IOException 
-	  {
-		  if( value.getNumColumns()>1 )
-			  throw new IOException("IndexSort only supports column vectors, but found matrix block with clen="+value.getNumColumns());
-		  
-		  long row_offset = (key.getRowIndex()-1)*_brlen+1;
-		  for( int i=0; i<value.getNumRows(); i++ )
-		  {
-			  double dval = value.quickGetValue(i, 0);
-			  long lix = row_offset+i;
-			  _tmpSortKey.set( dval, lix );
-			  _tmpLong.set(lix);
-			  out.collect(_tmpSortKey, _tmpLong);  
-		  }
-	  }
-	
-	  @Override
-	  public void configure(JobConf job)
-	  {
-		 super.configure(job);
-		 _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
-		 boolean desc = job.getBoolean(SortMR.SORT_DECREASING, false);
-		 if( !desc )
-			 _tmpSortKey = new IndexSortComparable();
-		 else
-			 _tmpSortKey = new IndexSortComparableDesc();
-	  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.matrix.SortMR;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+public class IndexSortMapper extends MapReduceBase 
+   implements Mapper<MatrixIndexes, MatrixBlock, IndexSortComparable, LongWritable>
+{
+		
+	  private int _brlen = -1;
+	  
+	  //reuse writables
+	  private LongWritable   _tmpLong = new LongWritable();
+	  private IndexSortComparable _tmpSortKey = null;
+		
+	  @Override
+	  public void map(MatrixIndexes key, MatrixBlock value, OutputCollector<IndexSortComparable, LongWritable> out, Reporter reporter) 
+        throws IOException 
+	  {
+		  if( value.getNumColumns()>1 )
+			  throw new IOException("IndexSort only supports column vectors, but found matrix block with clen="+value.getNumColumns());
+		  
+		  long row_offset = (key.getRowIndex()-1)*_brlen+1;
+		  for( int i=0; i<value.getNumRows(); i++ )
+		  {
+			  double dval = value.quickGetValue(i, 0);
+			  long lix = row_offset+i;
+			  _tmpSortKey.set( dval, lix );
+			  _tmpLong.set(lix);
+			  out.collect(_tmpSortKey, _tmpLong);  
+		  }
+	  }
+	
+	  @Override
+	  public void configure(JobConf job)
+	  {
+		 super.configure(job);
+		 _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
+		 boolean desc = job.getBoolean(SortMR.SORT_DECREASING, false);
+		 if( !desc )
+			 _tmpSortKey = new IndexSortComparable();
+		 else
+			 _tmpSortKey = new IndexSortComparableDesc();
+	  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortReducer.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortReducer.java
index 30c0a61..51a8a97 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortReducer.java
@@ -1,103 +1,103 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.matrix.SortMR;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-public class IndexSortReducer extends MapReduceBase 
-    implements Reducer<IndexSortComparable, LongWritable, MatrixIndexes, MatrixBlock>
-{
-	
-	
-	  private String _taskID = null;
-	  private int _brlen = -1;
-	  private MatrixIndexes _indexes = null;
-	  private MatrixBlock _data = null;
-	  private int _pos = 0;
-	  
-	  private OutputCollector<MatrixIndexes, MatrixBlock> _out = null;
-	  
-	  @Override
-	  public void reduce(IndexSortComparable key, Iterator<LongWritable> values, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter report) 
-		 throws IOException 
-	  {
-		  //cache output collector
-		  _out = out;
-		  
-		  //output binary block
-		  int count = 0;
-		  while( values.hasNext() )
-		  {
-			  //flush full matrix block
-			  if( _pos >= _brlen ) {
-				  _indexes.setIndexes(_indexes.getRowIndex()+1, _indexes.getColumnIndex());
-				  out.collect(_indexes, _data);
-				  _pos = 0;
-				  _data.reset(_brlen,1,false);
-			  }
-				  
-			  _data.quickSetValue(_pos, 0, values.next().get());
-			  _pos++;
-			  count++;  
-		  }
-		  
-		  report.incrCounter(SortMR.NUM_VALUES_PREFIX, _taskID, count);	
-	  }  
-		
-	  @Override
-	  public void configure(JobConf job) 
-	  {
-		  _taskID = MapReduceTool.getUniqueKeyPerTask(job, false);
-		  _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
-		  _pos = 0;
-		  _data = new MatrixBlock(_brlen, 1, false);
-		  //note the output indexes are a sequence for rows and the taskID for columns
-		  //this is useful because the counts are collected over taskIDs as well, which
-		  //later on makes the task of reshifting self contained 
-		  _indexes = new MatrixIndexes(0, Long.parseLong(_taskID));
-	  }
-	  
-	  @Override
-	  public void close() 
-		  throws IOException
-	  {  
-		  //flush final matrix block
-		  if( _pos > 0 ){
-			  _indexes.setIndexes(_indexes.getRowIndex()+1, _indexes.getColumnIndex());
-			  _data.setNumRows(_pos);
-			  _out.collect(_indexes, _data);
-		  }
-	  }
-}
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.matrix.SortMR;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.util.MapReduceTool;
+
+public class IndexSortReducer extends MapReduceBase 
+    implements Reducer<IndexSortComparable, LongWritable, MatrixIndexes, MatrixBlock>
+{
+	
+	
+	  private String _taskID = null;
+	  private int _brlen = -1;
+	  private MatrixIndexes _indexes = null;
+	  private MatrixBlock _data = null;
+	  private int _pos = 0;
+	  
+	  private OutputCollector<MatrixIndexes, MatrixBlock> _out = null;
+	  
+	  @Override
+	  public void reduce(IndexSortComparable key, Iterator<LongWritable> values, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter report) 
+		 throws IOException 
+	  {
+		  //cache output collector
+		  _out = out;
+		  
+		  //output binary block
+		  int count = 0;
+		  while( values.hasNext() )
+		  {
+			  //flush full matrix block
+			  if( _pos >= _brlen ) {
+				  _indexes.setIndexes(_indexes.getRowIndex()+1, _indexes.getColumnIndex());
+				  out.collect(_indexes, _data);
+				  _pos = 0;
+				  _data.reset(_brlen,1,false);
+			  }
+				  
+			  _data.quickSetValue(_pos, 0, values.next().get());
+			  _pos++;
+			  count++;  
+		  }
+		  
+		  report.incrCounter(SortMR.NUM_VALUES_PREFIX, _taskID, count);	
+	  }  
+		
+	  @Override
+	  public void configure(JobConf job) 
+	  {
+		  _taskID = MapReduceTool.getUniqueKeyPerTask(job, false);
+		  _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
+		  _pos = 0;
+		  _data = new MatrixBlock(_brlen, 1, false);
+		  //note the output indexes are a sequence for rows and the taskID for columns
+		  //this is useful because the counts are collected over taskIDs as well, which
+		  //later on makes the task of reshifting self contained 
+		  _indexes = new MatrixIndexes(0, Long.parseLong(_taskID));
+	  }
+	  
+	  @Override
+	  public void close() 
+		  throws IOException
+	  {  
+		  //flush final matrix block
+		  if( _pos > 0 ){
+			  _indexes.setIndexes(_indexes.getRowIndex()+1, _indexes.getColumnIndex());
+			  _data.setNumRows(_pos);
+			  _out.collect(_indexes, _data);
+		  }
+	  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupMapper.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupMapper.java
index 6405d1a..306fbc2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupMapper.java
@@ -1,141 +1,141 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.matrix.SortMR;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-public class IndexSortStitchupMapper extends MapReduceBase 
- 	  implements Mapper<MatrixIndexes, MatrixBlock, MatrixIndexes, MatrixBlock>
-{
-	
-  	private long[] _offsets = null;
-  	private long _rlen = -1;
-  	private long _brlen = -1;
-  	
-  	private MatrixBlock _tmpBlk = null;
-  	private MatrixIndexes _tmpIx = null;
-  	
-	@Override
-	public void map(MatrixIndexes key, MatrixBlock value, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter) 
-		throws IOException 
-	{
-		//compute starting cell offset
-		int id = (int)key.getColumnIndex();
-		long offset = _offsets[id];
-		offset += (key.getRowIndex()-1)*_brlen;
-		
-		//SPECIAL CASE: block aligned
-		int blksize = computeOutputBlocksize(_rlen, _brlen, offset);
-		if( offset%_brlen==0 && value.getNumRows()==blksize ) 
-		{ 
-			_tmpIx.setIndexes(offset/_brlen+1, 1);
-			out.collect(_tmpIx, value);
-		}
-		//GENERAL CASE: not block aligned
-		else 
-		{
-			int loffset = (int) (offset%_brlen);
-			//multiple output blocks
-			if( value.getNumRows()+loffset>_brlen ) 
-			{
-				long tmpnnz = 0;
-				//output first part
-				_tmpBlk.reset( (int)_brlen, 1 );
-				for( int i=0; i<_brlen-loffset; i++ )
-					_tmpBlk.quickSetValue(loffset+i, 0, value.quickGetValue(i, 0));
-				tmpnnz += _tmpBlk.getNonZeros();
-				_tmpIx.setIndexes(offset/_brlen+1, 1);
-				out.collect(_tmpIx, _tmpBlk);		
-			
-				//output second block
-				blksize = computeOutputBlocksize(_rlen, _brlen, offset+(_brlen-loffset));
-				_tmpBlk.reset( blksize, 1 );
-				for( int i=(int)_brlen-loffset; i<value.getNumRows(); i++ )
-					_tmpBlk.quickSetValue(i-((int)_brlen-loffset), 0, value.quickGetValue(i, 0));
-				tmpnnz += _tmpBlk.getNonZeros();
-				_tmpIx.setIndexes(offset/_brlen+2, 1);
-				out.collect(_tmpIx, _tmpBlk);	
-				
-				//sanity check for correctly redistributed non-zeros
-				if( tmpnnz != value.getNonZeros() )
-					throw new IOException("Number of split non-zeros does not match non-zeros of original block ("+tmpnnz+" vs "+value.getNonZeros()+")");
-			}
-			//single output block
-			else 
-			{	
-				_tmpBlk.reset( blksize, 1 );
-				for( int i=0; i<value.getNumRows(); i++ )
-					_tmpBlk.quickSetValue(loffset+i, 0, value.quickGetValue(i, 0));
-				_tmpIx.setIndexes(offset/_brlen+1, 1);
-				out.collect(_tmpIx, _tmpBlk);		
-			}
-		}
-	}
-	
-	@Override
-	public void configure(JobConf job)
-	{
-		super.configure(job);
-		_offsets = parseOffsets(job.get(SortMR.SORT_INDEXES_OFFSETS));
-		_rlen = MRJobConfiguration.getNumRows(job, (byte) 0);
-		_brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
-		
-		_tmpIx = new MatrixIndexes();
-		_tmpBlk = new MatrixBlock((int)_brlen, 1, false);
-	}
-	
-	
-	/**
-	 * 
-	 * @param str
-	 * @return
-	 */
-	private static long[] parseOffsets(String str)
-	{
-		String counts = str.substring(1, str.length() - 1);
-		StringTokenizer st = new StringTokenizer(counts, ",");
-		int len = st.countTokens();
-		long[] ret = new long[len];
-		for( int i=0; i<len; i++ )
-			ret[i] = Long.parseLong(st.nextToken().trim());
-		
-		return ret;
-	}
-	
-	private static int computeOutputBlocksize( long rlen, long brlen, long offset )
-	{
-		long rix = offset/brlen+1;
-		int blksize = (int) Math.min(brlen, rlen-(rix-1)*brlen);
-
-		return blksize;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.matrix.SortMR;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+public class IndexSortStitchupMapper extends MapReduceBase 
+ 	  implements Mapper<MatrixIndexes, MatrixBlock, MatrixIndexes, MatrixBlock>
+{
+	
+  	private long[] _offsets = null;
+  	private long _rlen = -1;
+  	private long _brlen = -1;
+  	
+  	private MatrixBlock _tmpBlk = null;
+  	private MatrixIndexes _tmpIx = null;
+  	
+	@Override
+	public void map(MatrixIndexes key, MatrixBlock value, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter) 
+		throws IOException 
+	{
+		//compute starting cell offset
+		int id = (int)key.getColumnIndex();
+		long offset = _offsets[id];
+		offset += (key.getRowIndex()-1)*_brlen;
+		
+		//SPECIAL CASE: block aligned
+		int blksize = computeOutputBlocksize(_rlen, _brlen, offset);
+		if( offset%_brlen==0 && value.getNumRows()==blksize ) 
+		{ 
+			_tmpIx.setIndexes(offset/_brlen+1, 1);
+			out.collect(_tmpIx, value);
+		}
+		//GENERAL CASE: not block aligned
+		else 
+		{
+			int loffset = (int) (offset%_brlen);
+			//multiple output blocks
+			if( value.getNumRows()+loffset>_brlen ) 
+			{
+				long tmpnnz = 0;
+				//output first part
+				_tmpBlk.reset( (int)_brlen, 1 );
+				for( int i=0; i<_brlen-loffset; i++ )
+					_tmpBlk.quickSetValue(loffset+i, 0, value.quickGetValue(i, 0));
+				tmpnnz += _tmpBlk.getNonZeros();
+				_tmpIx.setIndexes(offset/_brlen+1, 1);
+				out.collect(_tmpIx, _tmpBlk);		
+			
+				//output second block
+				blksize = computeOutputBlocksize(_rlen, _brlen, offset+(_brlen-loffset));
+				_tmpBlk.reset( blksize, 1 );
+				for( int i=(int)_brlen-loffset; i<value.getNumRows(); i++ )
+					_tmpBlk.quickSetValue(i-((int)_brlen-loffset), 0, value.quickGetValue(i, 0));
+				tmpnnz += _tmpBlk.getNonZeros();
+				_tmpIx.setIndexes(offset/_brlen+2, 1);
+				out.collect(_tmpIx, _tmpBlk);	
+				
+				//sanity check for correctly redistributed non-zeros
+				if( tmpnnz != value.getNonZeros() )
+					throw new IOException("Number of split non-zeros does not match non-zeros of original block ("+tmpnnz+" vs "+value.getNonZeros()+")");
+			}
+			//single output block
+			else 
+			{	
+				_tmpBlk.reset( blksize, 1 );
+				for( int i=0; i<value.getNumRows(); i++ )
+					_tmpBlk.quickSetValue(loffset+i, 0, value.quickGetValue(i, 0));
+				_tmpIx.setIndexes(offset/_brlen+1, 1);
+				out.collect(_tmpIx, _tmpBlk);		
+			}
+		}
+	}
+	
+	@Override
+	public void configure(JobConf job)
+	{
+		super.configure(job);
+		_offsets = parseOffsets(job.get(SortMR.SORT_INDEXES_OFFSETS));
+		_rlen = MRJobConfiguration.getNumRows(job, (byte) 0);
+		_brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
+		
+		_tmpIx = new MatrixIndexes();
+		_tmpBlk = new MatrixBlock((int)_brlen, 1, false);
+	}
+	
+	
+	/**
+	 * 
+	 * @param str
+	 * @return
+	 */
+	private static long[] parseOffsets(String str)
+	{
+		String counts = str.substring(1, str.length() - 1);
+		StringTokenizer st = new StringTokenizer(counts, ",");
+		int len = st.countTokens();
+		long[] ret = new long[len];
+		for( int i=0; i<len; i++ )
+			ret[i] = Long.parseLong(st.nextToken().trim());
+		
+		return ret;
+	}
+	
+	private static int computeOutputBlocksize( long rlen, long brlen, long offset )
+	{
+		long rix = offset/brlen+1;
+		int blksize = (int) Math.min(brlen, rlen-(rix-1)*brlen);
+
+		return blksize;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupReducer.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupReducer.java
index f0f3c1e..fb9ec62 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortStitchupReducer.java
@@ -1,74 +1,74 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-public class IndexSortStitchupReducer extends MapReduceBase 
-		implements Reducer<MatrixIndexes, MatrixBlock, MatrixIndexes, MatrixBlock>
-{
-	
-	private MatrixBlock _tmpBlk = null;
-	
-	@Override
-	public void reduce(MatrixIndexes key, Iterator<MatrixBlock> values, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter report) 
-		 throws IOException 
-	{
-		try
-		{
-			//handle first block (to handle dimensions)
-			MatrixBlock tmp = values.next();
-			_tmpBlk.reset(tmp.getNumRows(), tmp.getNumColumns());
-			_tmpBlk.merge(tmp, false);		
-			
-			//handle remaining blocks
-			while( values.hasNext() )
-			{
-				tmp = values.next();
-				_tmpBlk.merge(tmp, false);
-			}
-		}
-		catch(DMLRuntimeException ex)
-		{
-			throw new IOException(ex);
-		}
-		
-		out.collect(key, _tmpBlk);
-	}  
-	
-	@Override
-	public void configure(JobConf job)
-	{
-		int brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
-		_tmpBlk = new MatrixBlock(brlen, 1, false);
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+public class IndexSortStitchupReducer extends MapReduceBase 
+		implements Reducer<MatrixIndexes, MatrixBlock, MatrixIndexes, MatrixBlock>
+{
+	
+	private MatrixBlock _tmpBlk = null;
+	
+	@Override
+	public void reduce(MatrixIndexes key, Iterator<MatrixBlock> values, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter report) 
+		 throws IOException 
+	{
+		try
+		{
+			//handle first block (to handle dimensions)
+			MatrixBlock tmp = values.next();
+			_tmpBlk.reset(tmp.getNumRows(), tmp.getNumColumns());
+			_tmpBlk.merge(tmp, false);		
+			
+			//handle remaining blocks
+			while( values.hasNext() )
+			{
+				tmp = values.next();
+				_tmpBlk.merge(tmp, false);
+			}
+		}
+		catch(DMLRuntimeException ex)
+		{
+			throw new IOException(ex);
+		}
+		
+		out.collect(key, _tmpBlk);
+	}  
+	
+	@Override
+	public void configure(JobConf job)
+	{
+		int brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
+		_tmpBlk = new MatrixBlock(brlen, 1, false);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortMapper.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortMapper.java
index 126539c..e4477e2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortMapper.java
@@ -1,98 +1,98 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction;
-import org.apache.sysml.runtime.matrix.SortMR;
-import org.apache.sysml.runtime.matrix.data.Converter;
-import org.apache.sysml.runtime.matrix.data.MatrixCell;
-import org.apache.sysml.runtime.matrix.data.Pair;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-@SuppressWarnings("rawtypes")
-public class ValueSortMapper<KIN extends WritableComparable, VIN extends Writable, KOUT extends WritableComparable, VOUT extends Writable> extends MapReduceBase 
-      implements Mapper<KIN, VIN, KOUT, VOUT>
-{
-	
-	private int brlen;
-	private int bclen;
-	private CombineUnaryInstruction combineInstruction=null;
-	private Converter<KIN, VIN, KOUT, VOUT> inputConverter;
-	private IntWritable one=new IntWritable(1);
-	private DoubleWritable combinedKey=new DoubleWritable();
-	
-	@SuppressWarnings("unchecked")
-	public void map(KIN key, VIN value, OutputCollector<KOUT, VOUT> out,
-			Reporter reporter) throws IOException {
-		inputConverter.convert(key, value);
-		while(inputConverter.hasNext())
-		{
-			Pair pair=inputConverter.next();
-			if(combineInstruction==null)
-			{
-				//System.out.println("output: "+pair.getKey()+": "+pair.getValue());
-				out.collect((KOUT) pair.getKey(), (VOUT)pair.getValue());
-			}else
-			{
-				processCombineUnaryInstruction(pair, out);
-			}
-		}
-	} 
-	
-	@SuppressWarnings("unchecked")
-	private void processCombineUnaryInstruction(Pair pair, OutputCollector<KOUT, VOUT> out) 
-		throws IOException
-	{
-		combinedKey.set(((MatrixCell)pair.getValue()).getValue());
-		out.collect((KOUT)combinedKey, (VOUT)one);
-	}
-	
-	@Override
-	@SuppressWarnings("unchecked")
-	public void configure(JobConf job)
-	{
-		try 
-		{
-			brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
-			bclen = MRJobConfiguration.getNumColumnsPerBlock(job, (byte) 0);
-			String str=job.get(SortMR.COMBINE_INSTRUCTION, null);
-			if(str!=null && !str.isEmpty() && !"null".equals(str))
-					combineInstruction=(CombineUnaryInstruction) CombineUnaryInstruction.parseInstruction(str);
-			inputConverter = MRJobConfiguration.getInputConverter(job, (byte) 0);
-			inputConverter.setBlockSize(brlen, bclen);
-		} 
-		catch (DMLRuntimeException e) {
-			throw new RuntimeException(e);
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction;
+import org.apache.sysml.runtime.matrix.SortMR;
+import org.apache.sysml.runtime.matrix.data.Converter;
+import org.apache.sysml.runtime.matrix.data.MatrixCell;
+import org.apache.sysml.runtime.matrix.data.Pair;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+@SuppressWarnings("rawtypes")
+public class ValueSortMapper<KIN extends WritableComparable, VIN extends Writable, KOUT extends WritableComparable, VOUT extends Writable> extends MapReduceBase 
+      implements Mapper<KIN, VIN, KOUT, VOUT>
+{
+	
+	private int brlen;
+	private int bclen;
+	private CombineUnaryInstruction combineInstruction=null;
+	private Converter<KIN, VIN, KOUT, VOUT> inputConverter;
+	private IntWritable one=new IntWritable(1);
+	private DoubleWritable combinedKey=new DoubleWritable();
+	
+	@SuppressWarnings("unchecked")
+	public void map(KIN key, VIN value, OutputCollector<KOUT, VOUT> out,
+			Reporter reporter) throws IOException {
+		inputConverter.convert(key, value);
+		while(inputConverter.hasNext())
+		{
+			Pair pair=inputConverter.next();
+			if(combineInstruction==null)
+			{
+				//System.out.println("output: "+pair.getKey()+": "+pair.getValue());
+				out.collect((KOUT) pair.getKey(), (VOUT)pair.getValue());
+			}else
+			{
+				processCombineUnaryInstruction(pair, out);
+			}
+		}
+	} 
+	
+	@SuppressWarnings("unchecked")
+	private void processCombineUnaryInstruction(Pair pair, OutputCollector<KOUT, VOUT> out) 
+		throws IOException
+	{
+		combinedKey.set(((MatrixCell)pair.getValue()).getValue());
+		out.collect((KOUT)combinedKey, (VOUT)one);
+	}
+	
+	@Override
+	@SuppressWarnings("unchecked")
+	public void configure(JobConf job)
+	{
+		try 
+		{
+			brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
+			bclen = MRJobConfiguration.getNumColumnsPerBlock(job, (byte) 0);
+			String str=job.get(SortMR.COMBINE_INSTRUCTION, null);
+			if(str!=null && !str.isEmpty() && !"null".equals(str))
+					combineInstruction=(CombineUnaryInstruction) CombineUnaryInstruction.parseInstruction(str);
+			inputConverter = MRJobConfiguration.getInputConverter(job, (byte) 0);
+			inputConverter.setBlockSize(brlen, bclen);
+		} 
+		catch (DMLRuntimeException e) {
+			throw new RuntimeException(e);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortReducer.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortReducer.java
index 23bdbc8..13751c5 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/ValueSortReducer.java
@@ -1,68 +1,68 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.runtime.matrix.SortMR;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-@SuppressWarnings("rawtypes")
-public class ValueSortReducer<K extends WritableComparable, V extends Writable> extends MapReduceBase 
-      implements Reducer<K, V, K, V>
-{	
-	
-	private String taskID=null;
-	private boolean valueIsWeight=false;
-	private long count=0;
-	
-	public void configure(JobConf job)
-	{
-		taskID=MapReduceTool.getUniqueKeyPerTask(job, false);
-		valueIsWeight=job.getBoolean(SortMR.VALUE_IS_WEIGHT, false);
-	}
-
-	@Override
-	public void reduce(K key, Iterator<V> values, OutputCollector<K, V> out,
-			Reporter report) throws IOException {
-		int sum=0;
-		while(values.hasNext())
-		{
-			V value=values.next();
-			out.collect(key, value);
-			if(valueIsWeight)
-				sum+=((IntWritable)value).get();
-			else
-				sum++;
-		}
-		count+=sum;
-		report.incrCounter(SortMR.NUM_VALUES_PREFIX, taskID, sum);
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.runtime.matrix.SortMR;
+import org.apache.sysml.runtime.util.MapReduceTool;
+
+@SuppressWarnings("rawtypes")
+public class ValueSortReducer<K extends WritableComparable, V extends Writable> extends MapReduceBase 
+      implements Reducer<K, V, K, V>
+{	
+	
+	private String taskID=null;
+	private boolean valueIsWeight=false;
+	private long count=0;
+	
+	public void configure(JobConf job)
+	{
+		taskID=MapReduceTool.getUniqueKeyPerTask(job, false);
+		valueIsWeight=job.getBoolean(SortMR.VALUE_IS_WEIGHT, false);
+	}
+
+	@Override
+	public void reduce(K key, Iterator<V> values, OutputCollector<K, V> out,
+			Reporter report) throws IOException {
+		int sum=0;
+		while(values.hasNext())
+		{
+			V value=values.next();
+			out.collect(key, value);
+			if(valueIsWeight)
+				sum+=((IntWritable)value).get();
+			else
+				sum++;
+		}
+		count+=sum;
+		report.incrCounter(SortMR.NUM_VALUES_PREFIX, taskID, sum);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMR.java b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMR.java
index a94b371..c51652f 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMR.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMR.java
@@ -1,156 +1,156 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.util.HashSet;
-
-import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.Counters.Group;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.conf.DMLConfig;
-import org.apache.sysml.runtime.instructions.InstructionParser;
-import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
-import org.apache.sysml.runtime.matrix.CSVReblockMR;
-import org.apache.sysml.runtime.matrix.CSVReblockMR.BlockRow;
-import org.apache.sysml.runtime.matrix.JobReturn;
-import org.apache.sysml.runtime.matrix.WriteCSVMR;
-import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
-import org.apache.sysml.runtime.matrix.mapred.CSVReblockReducer;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-/**
- * MapReduce job that performs the actual data transformations, such as recoding
- * and binning. In contrast to ApplyTxCSVMR, this job generates the output in
- * BinaryBlock format. This job takes a data set as well as the transformation
- * metadata (which, for example, computed from GenTxMtdMR) as inputs.
- * 
- */
-
-@SuppressWarnings("deprecation")
-public class ApplyTfBBMR {
-	
-	public static JobReturn runJob(String inputPath, String rblkInst, String otherInst, String specPath, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numRows, long numColsBefore, long numColsAfter, int replication, String headerLine) throws Exception {
-		
-		CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(rblkInst);
-		
-		long[] rlens = new long[]{numRows};
-		long[] clens = new long[]{numColsAfter};
-		int[] brlens = new int[]{rblk.brlen};
-		int[] bclens = new int[]{rblk.bclen};
-		byte[] realIndexes = new byte[]{rblk.input};
-		byte[] resultIndexes = new byte[]{rblk.output};
-
-		JobConf job = new JobConf(ApplyTfBBMR.class);
-		job.setJobName("ApplyTfBB");
-
-		/* Setup MapReduce Job */
-		job.setJarByClass(ApplyTfBBMR.class);
-		
-		// set relevant classes
-		job.setMapperClass(ApplyTfBBMapper.class);
-	
-		MRJobConfiguration.setUpMultipleInputs(job, realIndexes, new String[]{inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, brlens, bclens, false, ConvertTarget.CELL);
-
-		MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
-		MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
-
-		MRJobConfiguration.setCSVReblockInstructions(job, rblkInst);
-		
-		//set up the instructions that will happen in the reducer, after the aggregation instrucions
-		MRJobConfiguration.setInstructionsInReducer(job, otherInst);
-
-		job.setInt("dfs.replication", replication);
-		
-		//set up preferred custom serialization framework for binary block format
-		if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
-			MRJobConfiguration.addBinaryBlockSerializationFramework( job );
-
-		//set up what matrices are needed to pass from the mapper to reducer
-		HashSet<Byte> mapoutputIndexes=MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes,  null, 
-				rblkInst, null, otherInst, resultIndexes);
-
-		MatrixChar_N_ReducerGroups ret=MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, 
-				null, rblkInst, null, null, null, resultIndexes, mapoutputIndexes, false);
-
-		//set up the number of reducers
-		int numRed = WriteCSVMR.determineNumReducers(rlens, clens, ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups);
-		job.setNumReduceTasks( numRed );
-
-		//set up the multiple output files, and their format information
-		MRJobConfiguration.setUpMultipleOutputs(job, new byte[]{rblk.output}, new byte[]{0}, new String[]{outputPath}, new OutputInfo[]{OutputInfo.BinaryBlockOutputInfo}, true, false);
-		
-		// configure mapper and the mapper output key value pairs
-		job.setMapperClass(ApplyTfBBMapper.class);
-		job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
-		job.setMapOutputValueClass(BlockRow.class);
-		
-		//configure reducer
-		job.setReducerClass(CSVReblockReducer.class);
-	
-		//turn off adaptivemr
-		job.setBoolean("adaptivemr.map.enable", false);
-
-		//set unique working dir
-		MRJobConfiguration.setUniqueWorkingDir(job);
-		
-		// Add transformation metadata file as well as partOffsetsFile to Distributed cache
-		DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
-		DistributedCache.createSymlink(job);
-		
-		Path cachefile=new Path(new Path(partOffsetsFile), "part-00000");
-		DistributedCache.addCacheFile(cachefile.toUri(), job);
-		DistributedCache.createSymlink(job);
-		
-		job.set(MRJobConfiguration.TF_HAS_HEADER, 	Boolean.toString(inputDataProperties.hasHeader()));
-		job.set(MRJobConfiguration.TF_DELIM, 		inputDataProperties.getDelim());
-		if ( inputDataProperties.getNAStrings() != null)
-			// Adding "dummy" string to handle the case of na_strings = ""
-			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
-		job.set(MRJobConfiguration.TF_SPEC_FILE, 	specPath);
-		job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
-		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
-		job.setLong(MRJobConfiguration.TF_NUM_COLS, numColsBefore);
-		job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
-		job.set(MRJobConfiguration.TF_HEADER, headerLine);
-		job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
-		job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);
-
-		RunningJob runjob=JobClient.runJob(job);
-		
-		MapReduceTool.deleteFileIfExistOnHDFS(cachefile, job);
-		
-		Group group=runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
-		for(int i=0; i<resultIndexes.length; i++) {
-			ret.stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
-		}
-		return new JobReturn(ret.stats, runjob.isSuccessful());
-	}
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.util.HashSet;
+
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.Counters.Group;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
+import org.apache.sysml.runtime.instructions.InstructionParser;
+import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
+import org.apache.sysml.runtime.matrix.CSVReblockMR;
+import org.apache.sysml.runtime.matrix.CSVReblockMR.BlockRow;
+import org.apache.sysml.runtime.matrix.JobReturn;
+import org.apache.sysml.runtime.matrix.WriteCSVMR;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
+import org.apache.sysml.runtime.matrix.mapred.CSVReblockReducer;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups;
+import org.apache.sysml.runtime.util.MapReduceTool;
+
+/**
+ * MapReduce job that performs the actual data transformations, such as recoding
+ * and binning. In contrast to ApplyTxCSVMR, this job generates the output in
+ * BinaryBlock format. This job takes a data set as well as the transformation
+ * metadata (which, for example, computed from GenTxMtdMR) as inputs.
+ * 
+ */
+
+@SuppressWarnings("deprecation")
+public class ApplyTfBBMR {
+	
+	public static JobReturn runJob(String inputPath, String rblkInst, String otherInst, String specPath, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numRows, long numColsBefore, long numColsAfter, int replication, String headerLine) throws Exception {
+		
+		CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(rblkInst);
+		
+		long[] rlens = new long[]{numRows};
+		long[] clens = new long[]{numColsAfter};
+		int[] brlens = new int[]{rblk.brlen};
+		int[] bclens = new int[]{rblk.bclen};
+		byte[] realIndexes = new byte[]{rblk.input};
+		byte[] resultIndexes = new byte[]{rblk.output};
+
+		JobConf job = new JobConf(ApplyTfBBMR.class);
+		job.setJobName("ApplyTfBB");
+
+		/* Setup MapReduce Job */
+		job.setJarByClass(ApplyTfBBMR.class);
+		
+		// set relevant classes
+		job.setMapperClass(ApplyTfBBMapper.class);
+	
+		MRJobConfiguration.setUpMultipleInputs(job, realIndexes, new String[]{inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, brlens, bclens, false, ConvertTarget.CELL);
+
+		MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
+		MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
+
+		MRJobConfiguration.setCSVReblockInstructions(job, rblkInst);
+		
+		//set up the instructions that will happen in the reducer, after the aggregation instrucions
+		MRJobConfiguration.setInstructionsInReducer(job, otherInst);
+
+		job.setInt("dfs.replication", replication);
+		
+		//set up preferred custom serialization framework for binary block format
+		if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
+			MRJobConfiguration.addBinaryBlockSerializationFramework( job );
+
+		//set up what matrices are needed to pass from the mapper to reducer
+		HashSet<Byte> mapoutputIndexes=MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes,  null, 
+				rblkInst, null, otherInst, resultIndexes);
+
+		MatrixChar_N_ReducerGroups ret=MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, 
+				null, rblkInst, null, null, null, resultIndexes, mapoutputIndexes, false);
+
+		//set up the number of reducers
+		int numRed = WriteCSVMR.determineNumReducers(rlens, clens, ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups);
+		job.setNumReduceTasks( numRed );
+
+		//set up the multiple output files, and their format information
+		MRJobConfiguration.setUpMultipleOutputs(job, new byte[]{rblk.output}, new byte[]{0}, new String[]{outputPath}, new OutputInfo[]{OutputInfo.BinaryBlockOutputInfo}, true, false);
+		
+		// configure mapper and the mapper output key value pairs
+		job.setMapperClass(ApplyTfBBMapper.class);
+		job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
+		job.setMapOutputValueClass(BlockRow.class);
+		
+		//configure reducer
+		job.setReducerClass(CSVReblockReducer.class);
+	
+		//turn off adaptivemr
+		job.setBoolean("adaptivemr.map.enable", false);
+
+		//set unique working dir
+		MRJobConfiguration.setUniqueWorkingDir(job);
+		
+		// Add transformation metadata file as well as partOffsetsFile to Distributed cache
+		DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
+		DistributedCache.createSymlink(job);
+		
+		Path cachefile=new Path(new Path(partOffsetsFile), "part-00000");
+		DistributedCache.addCacheFile(cachefile.toUri(), job);
+		DistributedCache.createSymlink(job);
+		
+		job.set(MRJobConfiguration.TF_HAS_HEADER, 	Boolean.toString(inputDataProperties.hasHeader()));
+		job.set(MRJobConfiguration.TF_DELIM, 		inputDataProperties.getDelim());
+		if ( inputDataProperties.getNAStrings() != null)
+			// Adding "dummy" string to handle the case of na_strings = ""
+			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
+		job.set(MRJobConfiguration.TF_SPEC_FILE, 	specPath);
+		job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
+		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
+		job.setLong(MRJobConfiguration.TF_NUM_COLS, numColsBefore);
+		job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
+		job.set(MRJobConfiguration.TF_HEADER, headerLine);
+		job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
+		job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);
+
+		RunningJob runjob=JobClient.runJob(job);
+		
+		MapReduceTool.deleteFileIfExistOnHDFS(cachefile, job);
+		
+		Group group=runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
+		for(int i=0; i<resultIndexes.length; i++) {
+			ret.stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
+		}
+		return new JobReturn(ret.stats, runjob.isSuccessful());
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMapper.java b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMapper.java
index f2639f3..b91ab60 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMapper.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfBBMapper.java
@@ -1,151 +1,151 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.wink.json4j.JSONException;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
-import org.apache.sysml.runtime.matrix.CSVReblockMR;
-import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
-import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
-import org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper;
-import org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.IndexedBlockRow;
-import org.apache.sysml.runtime.matrix.mapred.MapperBase;
-
-@SuppressWarnings("deprecation")
-public class ApplyTfBBMapper extends MapperBase implements Mapper<LongWritable, Text, TaggedFirstSecondIndexes, CSVReblockMR.BlockRow>{
-	
-	boolean _partFileWithHeader = false;
-	TfUtils tfmapper = null;
-	Reporter _reporter = null;
-	
-	// variables relevant to CSV Reblock
-	private IndexedBlockRow idxRow = null;
-	private long rowOffset=0;
-	private HashMap<Long, Long> offsetMap=new HashMap<Long, Long>();
-	private boolean _first = true;
-	private long num=0;
-	
-	@Override
-	public void configure(JobConf job) {
-		super.configure(job);
-		try {
-			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
-			tfmapper = new TfUtils(job);
-			tfmapper.loadTfMetadata(job, true);
-			
-			// Load relevant information for CSV Reblock
-			ByteWritable key=new ByteWritable();
-			OffsetCount value=new OffsetCount();
-			Path p=new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
-			
-			FileSystem fs = FileSystem.get(job);
-			Path thisPath=new Path(job.get("map.input.file")).makeQualified(fs);
-			String thisfile=thisPath.toString();
-
-			SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job);
-			while (reader.next(key, value)) {
-				// "key" needn't be checked since the offset file has information about a single CSV input (the raw data file)
-				if(thisfile.equals(value.filename))
-					offsetMap.put(value.fileOffset, value.count);
-			}
-			reader.close();
-
-			idxRow = new CSVReblockMapper.IndexedBlockRow();
-			int maxBclen=0;
-		
-			for(ArrayList<CSVReblockInstruction> insv: csv_reblock_instructions)
-				for(CSVReblockInstruction in: insv)
-				{	
-					if(maxBclen<in.bclen)
-						maxBclen=in.bclen;
-				}
-			
-			//always dense since common csv usecase
-			idxRow.getRow().data.reset(1, maxBclen, false);		
-
-		} catch (IOException e) { throw new RuntimeException(e); }
- 		 catch(JSONException e)  { throw new RuntimeException(e); }
-
-	}
-	
-	@Override
-	public void map(LongWritable rawKey, Text rawValue, OutputCollector<TaggedFirstSecondIndexes,CSVReblockMR.BlockRow> out, Reporter reporter) throws IOException  {
-		
-		if(_first) {
-			rowOffset=offsetMap.get(rawKey.get());
-			_reporter = reporter;
-			_first=false;
-		}
-		
-		// output the header line
-		if ( rawKey.get() == 0 && _partFileWithHeader ) 
-		{
-			tfmapper.processHeaderLine();
-			if ( tfmapper.hasHeader() )
-				return;
-		}
-		
-		// parse the input line and apply transformation
-		String[] words = tfmapper.getWords(rawValue);
-		
-		if(!tfmapper.omit(words))
-		{
-			words = tfmapper.apply(words);
-			try {
-				tfmapper.check(words);
-				
-				// Perform CSV Reblock
-				CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
-				idxRow = CSVReblockMapper.processRow(idxRow, words, rowOffset, num, ins.output, ins.brlen, ins.bclen, ins.fill, ins.fillValue, out);
-			}
-			catch(DMLRuntimeException e) {
-				throw new RuntimeException(e.getMessage() + ":" + rawValue.toString());
-			}
-			num++;
-		}
-	}
-
-	@Override
-	public void close() throws IOException {
-	}
-
-	@Override
-	protected void specialOperationsForActualMap(int index,
-			OutputCollector<Writable, Writable> out, Reporter reporter)
-			throws IOException {
-	}
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.wink.json4j.JSONException;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
+import org.apache.sysml.runtime.matrix.CSVReblockMR;
+import org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount;
+import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
+import org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper;
+import org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.IndexedBlockRow;
+import org.apache.sysml.runtime.matrix.mapred.MapperBase;
+
+@SuppressWarnings("deprecation")
+public class ApplyTfBBMapper extends MapperBase implements Mapper<LongWritable, Text, TaggedFirstSecondIndexes, CSVReblockMR.BlockRow>{
+	
+	boolean _partFileWithHeader = false;
+	TfUtils tfmapper = null;
+	Reporter _reporter = null;
+	
+	// variables relevant to CSV Reblock
+	private IndexedBlockRow idxRow = null;
+	private long rowOffset=0;
+	private HashMap<Long, Long> offsetMap=new HashMap<Long, Long>();
+	private boolean _first = true;
+	private long num=0;
+	
+	@Override
+	public void configure(JobConf job) {
+		super.configure(job);
+		try {
+			_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
+			tfmapper = new TfUtils(job);
+			tfmapper.loadTfMetadata(job, true);
+			
+			// Load relevant information for CSV Reblock
+			ByteWritable key=new ByteWritable();
+			OffsetCount value=new OffsetCount();
+			Path p=new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
+			
+			FileSystem fs = FileSystem.get(job);
+			Path thisPath=new Path(job.get("map.input.file")).makeQualified(fs);
+			String thisfile=thisPath.toString();
+
+			SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job);
+			while (reader.next(key, value)) {
+				// "key" needn't be checked since the offset file has information about a single CSV input (the raw data file)
+				if(thisfile.equals(value.filename))
+					offsetMap.put(value.fileOffset, value.count);
+			}
+			reader.close();
+
+			idxRow = new CSVReblockMapper.IndexedBlockRow();
+			int maxBclen=0;
+		
+			for(ArrayList<CSVReblockInstruction> insv: csv_reblock_instructions)
+				for(CSVReblockInstruction in: insv)
+				{	
+					if(maxBclen<in.bclen)
+						maxBclen=in.bclen;
+				}
+			
+			//always dense since common csv usecase
+			idxRow.getRow().data.reset(1, maxBclen, false);		
+
+		} catch (IOException e) { throw new RuntimeException(e); }
+ 		 catch(JSONException e)  { throw new RuntimeException(e); }
+
+	}
+	
+	@Override
+	public void map(LongWritable rawKey, Text rawValue, OutputCollector<TaggedFirstSecondIndexes,CSVReblockMR.BlockRow> out, Reporter reporter) throws IOException  {
+		
+		if(_first) {
+			rowOffset=offsetMap.get(rawKey.get());
+			_reporter = reporter;
+			_first=false;
+		}
+		
+		// output the header line
+		if ( rawKey.get() == 0 && _partFileWithHeader ) 
+		{
+			tfmapper.processHeaderLine();
+			if ( tfmapper.hasHeader() )
+				return;
+		}
+		
+		// parse the input line and apply transformation
+		String[] words = tfmapper.getWords(rawValue);
+		
+		if(!tfmapper.omit(words))
+		{
+			words = tfmapper.apply(words);
+			try {
+				tfmapper.check(words);
+				
+				// Perform CSV Reblock
+				CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
+				idxRow = CSVReblockMapper.processRow(idxRow, words, rowOffset, num, ins.output, ins.brlen, ins.bclen, ins.fill, ins.fillValue, out);
+			}
+			catch(DMLRuntimeException e) {
+				throw new RuntimeException(e.getMessage() + ":" + rawValue.toString());
+			}
+			num++;
+		}
+	}
+
+	@Override
+	public void close() throws IOException {
+	}
+
+	@Override
+	protected void specialOperationsForActualMap(int index,
+			OutputCollector<Writable, Writable> out, Reporter reporter)
+			throws IOException {
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMR.java b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMR.java
index ce2cf48..7c323f4 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMR.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/ApplyTfCSVMR.java
@@ -1,128 +1,128 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.transform;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-
-import org.apache.sysml.runtime.matrix.CSVReblockMR;
-import org.apache.sysml.runtime.matrix.JobReturn;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-
-
-@SuppressWarnings("deprecation")
-public class ApplyTfCSVMR {
-	
-	public static JobReturn runJob(String inputPath, String specPath, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
-		JobConf job = new JobConf(ApplyTfCSVMR.class);
-		job.setJobName("ApplyTfCSV");
-
-		/* Setup MapReduce Job */
-		job.setJarByClass(ApplyTfCSVMR.class);
-		
-		// set relevant classes
-		job.setMapperClass(ApplyTfCSVMapper.class);
-		job.setNumReduceTasks(0);
-	
-		// Add transformation metadata file as well as partOffsetsFile to Distributed cache
-		DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
-		DistributedCache.createSymlink(job);
-		
-		Path cachefile=new Path(partOffsetsFile);
-		DistributedCache.addCacheFile(cachefile.toUri(), job);
-		DistributedCache.createSymlink(job);
-		
-		// set input and output properties
-		job.setInputFormat(TextInputFormat.class);
-		job.setOutputFormat(TextOutputFormat.class);
-		
-		job.setMapOutputKeyClass(NullWritable.class);
-		job.setMapOutputValueClass(Text.class);
-		
-		job.setOutputKeyClass(NullWritable.class);
-		job.setOutputValueClass(Text.class);
-		
-		job.setInt("dfs.replication", replication);
-		
-		FileInputFormat.addInputPath(job, new Path(inputPath));
-		// delete outputPath, if exists already.
-		Path outPath = new Path(outputPath);
-		FileSystem fs = FileSystem.get(job);
-		fs.delete(outPath, true);
-		FileOutputFormat.setOutputPath(job, outPath);
-
-		job.set(MRJobConfiguration.TF_HAS_HEADER, 	Boolean.toString(inputDataProperties.hasHeader()));
-		job.set(MRJobConfiguration.TF_DELIM, 		inputDataProperties.getDelim());
-		if ( inputDataProperties.getNAStrings() != null)
-			// Adding "dummy" string to handle the case of na_strings = ""
-			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
-		job.set(MRJobConfiguration.TF_SPEC_FILE, 	specPath);
-		job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
-		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
-		job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
-		job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
-		job.set(MRJobConfiguration.TF_HEADER, headerLine);
-		job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
-		job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);
-		
-		//turn off adaptivemr
-		job.setBoolean("adaptivemr.map.enable", false);
-
-		// Run the job
-		RunningJob runjob = JobClient.runJob(job);
-		
-		// Since transform CSV produces part files w/ prefix transform-part-*,
-		// delete all the "default" part-..... files
-		deletePartFiles(fs, outPath);
-		
-		MatrixCharacteristics mc = new MatrixCharacteristics();
-		return new JobReturn(new MatrixCharacteristics[]{mc}, runjob.isSuccessful());
-	}
-	
-	private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException
-	{
-		PathFilter filter=new PathFilter(){
-			public boolean accept(Path file) {
-				return file.getName().startsWith("part-");
-	        }
-		};
-		FileStatus[] list = fs.listStatus(path, filter);
-		for(FileStatus stat : list) {
-			fs.delete(stat.getPath(), false);
-		}
-	}
-	
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.transform;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+import org.apache.sysml.runtime.matrix.CSVReblockMR;
+import org.apache.sysml.runtime.matrix.JobReturn;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+
+
+@SuppressWarnings("deprecation")
+public class ApplyTfCSVMR {
+	
+	public static JobReturn runJob(String inputPath, String specPath, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numCols, int replication, String headerLine) throws IOException, ClassNotFoundException, InterruptedException {
+		JobConf job = new JobConf(ApplyTfCSVMR.class);
+		job.setJobName("ApplyTfCSV");
+
+		/* Setup MapReduce Job */
+		job.setJarByClass(ApplyTfCSVMR.class);
+		
+		// set relevant classes
+		job.setMapperClass(ApplyTfCSVMapper.class);
+		job.setNumReduceTasks(0);
+	
+		// Add transformation metadata file as well as partOffsetsFile to Distributed cache
+		DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
+		DistributedCache.createSymlink(job);
+		
+		Path cachefile=new Path(partOffsetsFile);
+		DistributedCache.addCacheFile(cachefile.toUri(), job);
+		DistributedCache.createSymlink(job);
+		
+		// set input and output properties
+		job.setInputFormat(TextInputFormat.class);
+		job.setOutputFormat(TextOutputFormat.class);
+		
+		job.setMapOutputKeyClass(NullWritable.class);
+		job.setMapOutputValueClass(Text.class);
+		
+		job.setOutputKeyClass(NullWritable.class);
+		job.setOutputValueClass(Text.class);
+		
+		job.setInt("dfs.replication", replication);
+		
+		FileInputFormat.addInputPath(job, new Path(inputPath));
+		// delete outputPath, if exists already.
+		Path outPath = new Path(outputPath);
+		FileSystem fs = FileSystem.get(job);
+		fs.delete(outPath, true);
+		FileOutputFormat.setOutputPath(job, outPath);
+
+		job.set(MRJobConfiguration.TF_HAS_HEADER, 	Boolean.toString(inputDataProperties.hasHeader()));
+		job.set(MRJobConfiguration.TF_DELIM, 		inputDataProperties.getDelim());
+		if ( inputDataProperties.getNAStrings() != null)
+			// Adding "dummy" string to handle the case of na_strings = ""
+			job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()) );
+		job.set(MRJobConfiguration.TF_SPEC_FILE, 	specPath);
+		job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
+		job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
+		job.setLong(MRJobConfiguration.TF_NUM_COLS, numCols);
+		job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
+		job.set(MRJobConfiguration.TF_HEADER, headerLine);
+		job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
+		job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);
+		
+		//turn off adaptivemr
+		job.setBoolean("adaptivemr.map.enable", false);
+
+		// Run the job
+		RunningJob runjob = JobClient.runJob(job);
+		
+		// Since transform CSV produces part files w/ prefix transform-part-*,
+		// delete all the "default" part-..... files
+		deletePartFiles(fs, outPath);
+		
+		MatrixCharacteristics mc = new MatrixCharacteristics();
+		return new JobReturn(new MatrixCharacteristics[]{mc}, runjob.isSuccessful());
+	}
+	
+	private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException
+	{
+		PathFilter filter=new PathFilter(){
+			public boolean accept(Path file) {
+				return file.getName().startsWith("part-");
+	        }
+		};
+		FileStatus[] list = fs.listStatus(path, filter);
+		for(FileStatus stat : list) {
+			fs.delete(stat.getPath(), false);
+		}
+	}
+	
+}


[47/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/KM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/KM.dml b/scripts/algorithms/KM.dml
index ae5d5dd..fbfa917 100644
--- a/scripts/algorithms/KM.dml
+++ b/scripts/algorithms/KM.dml
@@ -1,619 +1,619 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT ANALIZES SURVIVAL DATA USING KAPLAN-MEIER ESTIMATES 
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---          Location to read the input matrix X containing the survival data: 
-#								timestamps, whether event occurred (1) or data is censored (0), and a number of factors (categorical features) 
-#								for grouping and/or stratifying 
-# TE	  String   ---          Column indices of X which contain timestamps (first entry) and event information (second entry) 
-# GI	  String   ---          Column indices of X corresponding to the factors to be used for grouping
-# SI	  String   ---          Column indices of X corresponding to the factors to be used for stratifying				
-# O       String   ---          Location to write the matrix containing the results of the Kaplan-Meier analysis; see below for the description
-# M       String   ---          Location to write Matrix M containing the following statistic: total number of events, median and its confidence intervals; 
-#								if survival data for multiple groups and strata are provided each row of M contains the above statistics per group and stratum
-# T 	  String   " "			If survival data from multiple groups available and ttype=log-rank or wilcoxon, 
-#								location to write the matrix containing result of the (stratified) test for comparing multiple groups
-# alpha   Double   0.05         Parameter to compute 100*(1-alpha)% confidence intervals for the survivor function and its median 
-# etype   String   "greenwood"  Parameter to specify the error type according to "greenwood" (the default) or "peto"
-# ctype   String   "log"        Parameter to modify the confidence interval; "plain" keeps the lower and upper bound of 
-#								the confidence interval unmodified,	"log" (the default) corresponds to logistic transformation and 
-#								"log-log" corresponds to the complementary log-log transformation 
-# ttype   String   "none"   	If survival data for multiple groups is available specifies which test to perform for comparing 
-#								survival data across multiple groups: "none" (the default) "log-rank" or "wilcoxon" test   
-# fmt     String   "text"       The output format of results of the Kaplan-Meier analysis, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- Matrix KM whose dimension depends on the number of groups (denoted by g) and strata (denoted by s) in the data: 
-#	each collection of 7 consecutive columns in KM corresponds to a unique combination of groups and strata in the data with the following schema
-# 	1. col: timestamp
-# 	2. col: no. at risk
-# 	3. col: no. of events
-# 	4. col: Kaplan-Meier estimate of survivor function surv
-# 	5. col: standard error of surv
-# 	6. col: lower 100*(1-alpha)% confidence interval for surv
-# 	7. col: upper 100*(1-alpha)% confidence interval for surv
-# 2- Matrix M whose dimension depends on the number of groups (g) and strata (s) in the data (k denotes the number of factors used for grouping 
-#	,i.e., ncol(GI) and l denotes the number of factors used for stratifying, i.e., ncol(SI))
-#	M[,1:k]: unique combination of values in the k factors used for grouping 
-#	M[,(k+1):(k+l)]: unique combination of values in the l factors used for stratifying
-#	M[,k+l+1]: total number of records
-#	M[,k+l+2]: total number of events
-#	M[,k+l+3]: median of surv
-#	M[,k+l+4]: lower 100*(1-alpha)% confidence interval of the median of surv 
-#	M[,k+l+5]: upper 100*(1-alpha)% confidence interval of the median of surv
-#	If the number of groups and strata is equal to 1, M will have 4 columns with 
-#	M[,1]: total number of events
-#	M[,2]: median of surv
-#	M[,3]: lower 100*(1-alpha)% confidence interval of the median of surv 
-#	M[,4]: upper 100*(1-alpha)% confidence interval of the median of surv
-# 3- If survival data from multiple groups available and ttype=log-rank or wilcoxon, a 1 x 4 matrix T and an g x 5 matrix T_GROUPS_OE with
-#	T_GROUPS_OE[,1] = no. of events	
-#	T_GROUPS_OE[,2] = observed value (O)
-#	T_GROUPS_OE[,3] = expected value (E)
-#	T_GROUPS_OE[,4] = (O-E)^2/E
-#	T_GROUPS_OE[,5] = (O-E)^2/V 	 
-#	T[1,1] = no. of groups
-#	T[1,2] = degree of freedom for Chi-squared distributed test statistic
-#	T[1,3] = test statistic 
-#	T[1,4] = P-value
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f KM.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE GI=INPUT_DIR/GI SI=INPUT_DIR/SI O=OUTPUT_DIR/O 
-#											M=OUTPUT_DIR/M T=OUTPUT_DIR/T alpha=0.05 etype=greenwood ctype=log fmt=csv
-
-fileX = $X;
-fileTE = $TE;
-fileGI = ifdef ($GI, " ");
-fileSI = ifdef ($SI, " ");
-fileO = $O;
-fileM = $M;
-
-# Default values of some parameters
-fileT = ifdef ($T, " ");                  # $T=" "
-
-fileG = ifdef ($G, " ");                 # $G=" "
-fileS = ifdef ($S, " ");                 # $S=" "
-fmtO = ifdef ($fmt, "text");             # $fmt="text"
-alpha = ifdef ($alpha, 0.05);            # $alpha=0.05
-err_type  = ifdef ($etype, "greenwood"); # $etype="greenwood"
-conf_type = ifdef ($ctype, "log");       # $ctype="log"
-test_type = ifdef ($ttype, "none");      # $ttype="none"
-
-X = read (fileX);
-TE = read (fileTE);
-if (fileGI != " ") {
-	GI = read (fileGI);
-} else {
-    GI = matrix (0, rows = 1, cols = 1);
-}
-
-if (fileSI != " "){
-	SI = read (fileSI);
-} else {
-    SI = matrix (0, rows = 1, cols = 1);
-}
-
-TE = t(TE);
-GI = t(GI);
-SI = t(SI);
-		
-# check arguments for validity
-if (err_type != "greenwood" & err_type != "peto") { 
-	stop (err_type + " is not a valid error type!");
-}
-
-if (conf_type != "plain" & conf_type != "log" & conf_type != "log-log") { 
-	stop (conf_type + " is not a valid confidence type!");
-}
-
-if (test_type != "log-rank" & test_type != "wilcoxon" & test_type != "none") {
-	stop (test_type + " is not a valid test type!");
-}
-
-n_group_cols = ncol (GI);
-n_stratum_cols = ncol (SI);
-
-# check GI and SI for validity
-GI_1_1 = as.scalar (GI[1,1]);
-SI_1_1 = as.scalar (SI[1,1]);	
-if (n_group_cols == 1) {
-	if (GI_1_1 == 0) { # no factors for grouping
-		n_group_cols = 0;
-	}
-} else if (GI_1_1 == 0) {
-	stop ("Matrix GI contains zero entries!");
-}
-if (n_stratum_cols == 1) {
-	if (SI_1_1 == 0) { # no factors for stratifying
-		n_stratum_cols = 0;
-	}
-} else if (SI_1_1 == 0) {
-	stop ("Matrix SI contains zero entries!");
-}
-
-if (2 + n_group_cols + n_stratum_cols > ncol (X)) {
-	stop ("X has an incorrect number of columns!");
-}
-
-# reorder cols of X 
-if (GI_1_1 == 0 & SI_1_1 == 0) {
-	Is = TE;
-} else if (GI_1_1 == 0) {
-	Is = append (TE, SI);
-} else if (SI_1_1 == 0) {
-	Is = append (TE, GI);
-} else {
-	Is = append (TE, append (GI, SI));
-}
-X = X %*% table (Is, seq (1, 2 + n_group_cols + n_stratum_cols), ncol (X), 2 + n_group_cols + n_stratum_cols);	
-
-num_records = nrow (X);
-num_groups = 1;
-num_strata = 1;
-
-### compute group id for each record
-print ("Perform grouping...");
-if (n_group_cols > 0) {
-	for (g in 1:n_group_cols) { # sort columns corresponding to groups sequentially
-		X = order (target = X, by = 2 + g);
-	}
-	XG = X[,3:(3 + n_group_cols - 1)];
-	Idx = matrix (1, rows = num_records, cols = 1);
-	Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),3:(2 + n_group_cols)], X[2:num_records,3:(2 + n_group_cols)], "!="));
-	num_groups = sum (Idx);
-
-	XG = replace (target = XG, pattern = 0, replacement = "Infinity");
-	XG = XG * Idx;
-	XG = replace (target = XG, pattern = "NaN", replacement = 0);	
-	G_cols = removeEmpty (target = XG, margin = "rows"); 
-	G_cols = replace (target = G_cols, pattern = "Infinity", replacement = 0);	
-
-	A = removeEmpty (target = diag (Idx), margin = "cols");
-	if (ncol (A) > 1) {
-		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
-		B = cumsum (A);
-		Gi = B %*% seq(1, ncol(B)); # group ids
-	} else { # there is only one group
-		Gi = matrix (1, rows = num_records, cols = 1);
-	}
-	if (n_stratum_cols > 0) {
-		X = append (append (X[,1:2],Gi), X[,(3 + g):ncol (X)]);
-	} else { # no strata
-		X = append (X[,1:2],Gi);
-	}
-}
-
-### compute stratum id for each record
-print ("Perform stratifying...");
-if (n_stratum_cols > 0) {
-	s_offset = 2;
-	if (n_group_cols > 0) {
-		s_offset = 3;
-	}
-	for (s in 1:n_stratum_cols) { # sort columns corresponding to strata sequentially
-		X = order (target = X, by = s_offset + s);		
-	}
-	XS = X[,(s_offset + 1):(s_offset + n_stratum_cols)];
-	Idx = matrix (1, rows = num_records, cols = 1);
-	Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),(s_offset + 1):(s_offset + n_stratum_cols)], X[2:num_records,(s_offset + 1):(s_offset + n_stratum_cols)], "!="));
-	num_strata = sum (Idx);
-
-	XS = replace (target = XS, pattern = 0, replacement = "Infinity");
-	XS = XS * Idx;
-	XS = replace (target = XS, pattern = "NaN", replacement = 0);	
-	S_cols = removeEmpty (target = XS, margin = "rows"); 
-	S_cols = replace (target = S_cols, pattern = "Infinity", replacement = 0);	
-
-	SB = removeEmpty (target = seq (1,num_records), margin = "rows", select = Idx); # indices of stratum boundaries 
-	A = removeEmpty (target = diag (Idx), margin = "cols");
-	if (ncol (A) > 1) {
-		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
-		B = cumsum (A);
-		Si = B %*% seq(1, ncol(B)); # stratum ids
-	} else { # there is only one stratum
-		Si = matrix (1, rows = num_records, cols = 1);
-	}
-	X = append (X[,1:3],Si);
-}
-
-if (n_group_cols == 0 & n_stratum_cols == 0) {
-	X = append (X, matrix (1, rows = num_records, cols = 2));
-	SB = matrix (1, rows = 1, cols = 1);	
-} else if (n_group_cols == 0) {	
-	X = append (X[,1:2], append (matrix (1, rows = num_records, cols = 1), X[,3]));
-} else if (n_stratum_cols == 0) {
-	X = append (X, matrix (1, rows = num_records, cols = 1));
-	SB = matrix (1, rows = 1, cols = 1);
-}
-
-######## BEGIN KAPLAN-MEIER ANALYSIS
-print ("BEGIN KAPLAN-MEIER SURVIVAL FIT SCRIPT");
-
-KM = matrix (0, rows = num_records, cols = num_groups * num_strata * 7);
-KM_cols_select = matrix (1, rows = num_groups * num_strata * 7, cols = 1);
-GSI = matrix (0, rows = num_groups * num_strata, cols = 2);
-a = 1/0;
-M = matrix (a, rows = num_groups * num_strata, cols = 5);
-M_cols = seq (1, num_groups * num_strata);
-z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
-
-if (num_groups > 1 & test_type != "none") { 
-	str = "";
-	TEST = matrix (0, rows = num_groups, cols = 5);
-	TEST_GROUPS_OE = matrix (0, rows = 1, cols = 4);
-	U = matrix (0, rows = num_groups, cols = num_strata);
-	U_OE = matrix (0, rows = num_groups, cols = num_strata);
-	OBS = matrix (0, rows = num_groups, cols = num_strata);
-	EXP = matrix (0, rows = num_groups, cols = num_strata);
-	V_sum_total = matrix (0, rows = num_groups-1, cols = (num_groups-1) * num_strata);
-	n_event_all_global = matrix(1, rows=num_groups, cols=num_strata);
-} else if (num_groups == 1 & test_type != "none") {
-	stop ("Data contains only one group or no groups, at least two groups are required for test!");
-}
-
-parfor (s in 1:num_strata, check = 0) {
-	
-	start_ind = as.scalar (SB[s,]);
-	end_ind = num_records;
-	if (s != num_strata) {
-		end_ind = as.scalar (SB[s + 1,]) - 1;
-	} 
-	
-	######## RECODING TIMESTAMPS PRESERVING THE ORDER
-	
-	X_cur = X[start_ind:end_ind,];
-	range = end_ind - start_ind + 1;
-	X_cur = order (target = X_cur, by = 1);
-	Idx1 = matrix (1, rows = range, cols = 1);
-	
-	num_timestamps = 1;
-	if (range == 1) {
-		RT = matrix (1, rows = 1, cols = 1);
-	} else {
-		Idx1[2:range,1] = ppred (X_cur[1:(range - 1),1], X_cur[2:range,1], "!=");
-		num_timestamps = sum (Idx1);
-		A1 = removeEmpty (target = diag (Idx1), margin = "cols");
-		if (ncol (A1) > 1) {
-			A1[,1:(ncol (A1) - 1)] = A1[,1:(ncol (A1) - 1)] - A1[,2:ncol (A1)];
-			B1 = cumsum (A1);
-			RT = B1 %*% seq(1, ncol(B1)); 	
-		} else { # there is only one group
-			RT = matrix (1, rows = range, cols = 1);
-		}
-	}
-	
-	T = X_cur[,1];
-	E = X_cur[,2];
-	G = X_cur[,3];
-	S = X_cur[,4];
-	
-	n_event_stratum = aggregate (target = E, groups = RT, fn = "sum"); # no. of uncensored events per stratum 
-	n_event_all_stratum = aggregate (target = E, groups = RT, fn = "count"); # no. both censored and uncensored of events per stratum 
-	Idx1 = cumsum (n_event_all_stratum); 
-	time_stratum = table (seq (1, nrow (Idx1), 1), Idx1) %*% T; # distinct timestamps both censored and uncensored per stratum 
-	time_stratum_has_zero = sum (ppred (time_stratum, 0, "==")) > 0;
-	if (time_stratum_has_zero) {
-		time_stratum = 	replace (target = time_stratum, pattern = 0, replacement = "Infinity");
-	}
-	n_time_all1 = nrow (n_event_stratum);  # no. of distinct timestamps both censored and uncensored per stratum
-	n_event_all_stratum_agg = matrix (0, rows = n_time_all1, cols = 1); 
-	if (n_time_all1 > 1) {
-		n_event_all_stratum_agg[2:n_time_all1,] = Idx1[1:(n_time_all1 - 1),]; 
-	}
-	n_risk_stratum = range - n_event_all_stratum_agg; # no. at risk per stratum
-
-	if (num_groups > 1 & test_type != "none") {	# needed for log-rank or wilcoxon test	
-		n_risk_n_event_stratum = matrix (0, rows = n_time_all1, cols = num_groups * 2);
-	}
-
-	parfor (g in 1:num_groups, check = 0) {
-	
-		group_ind = ppred (G, g, "==");
-		KM_offset = (s - 1) * num_groups * 7 + (g - 1) * 7;
-		M_offset = (s - 1) * num_groups + g;
-		if (sum (group_ind) != 0) { # group g is present in the stratum s
-
-			GSI_offset = (s - 1) * num_groups + g; 
-			GSI[GSI_offset,1] = g;
-			GSI[GSI_offset,2] = s;		
-			E_cur = E * group_ind;
-
-			######## COMPUTE NO. AT RISK AND NO.OF EVENTS FOR EACH TIMESTAMP
-			
-			n_event = aggregate (target = E_cur, groups = RT, fn = "sum"); # no. of uncensored events per stratum per group
-			n_event_all = aggregate (target = group_ind, groups = RT, fn = "sum"); # no. of both censored and uncensored events per stratum per group
-			Idx1 = cumsum (n_event_all); 
-			event_occurred = ppred (n_event, 0, ">");
-			if (time_stratum_has_zero) {
-				time = replace (target = time_stratum * event_occurred, pattern = "NaN", replacement = 0);
-				time = removeEmpty (target = time, margin = "rows");
-				time = replace (target = time, pattern = "Infinity", replacement = 0);
-			} else {
-				time = removeEmpty (target = time_stratum * event_occurred, margin = "rows");
-			}
-			n_time_all2 = nrow (n_event);  # no. of distinct timestamps both censored and uncensored per stratum per group
-			n_event_all_agg = matrix (0, rows = n_time_all2, cols = 1); 
-			if (n_time_all2 > 1) {
-				n_event_all_agg[2:n_time_all2,] = Idx1[1:(n_time_all2 - 1),]; 
-			}
-				
-			n_risk = sum (group_ind) - n_event_all_agg; # no. at risk per stratum per group
-			
-			if (num_groups > 1 & test_type != "none") {
-				n_risk_n_event_stratum[,(g - 1) * 2 + 1] = n_risk;
-				n_risk_n_event_stratum[,(g - 1) * 2 + 2] = n_event;					
-			}
-			
-			# Extract only rows corresponding to events, i.e., for which n_event is nonzero 
-			Idx1 = ppred (n_event, 0, "!=");
-			KM_1 = matrix (0, rows = n_time_all2, cols = 2);
-			KM_1[,1] = n_risk;
-			KM_1[,2] = n_event;
-			KM_1 = removeEmpty (target = KM_1, margin = "rows", select = Idx1);
-			n_risk = KM_1[,1];
-			n_event = KM_1[,2];
-			n_time = nrow (time);
-			
-			######## ESTIMATE SERVIVOR FUNCTION SURV, ITS STANDARD ERROR SE_SURV, AND ITS 100(1-ALPHA)% CONFIDENCE INTERVAL	
-			surv = cumprod ((n_risk - n_event) / n_risk);
-			tmp = n_event / (n_risk * (n_risk - n_event));
-			se_surv = sqrt (cumsum (tmp)) * surv; 
-			if (err_type == "peto") {
-				se_surv = (surv * sqrt(1 - surv) / sqrt(n_risk));		
-			}
-		
-			if (conf_type == "plain") { 
-				# True survivor function is in [surv +- z_alpha_2 * se_surv], 
-				# values less than 0 are replaced by 0, values larger than 1are replaced by 1!
-				CI_l = max (surv - (z_alpha_2 * se_surv), 0);  
-				CI_r = min (surv + (z_alpha_2 * se_surv), 1); 
-			} else if (conf_type == "log") {
-				# True survivor function is in [surv * exp(+- z_alpha_2 * se_surv / surv)]
-				CI_l = max (surv * exp (- z_alpha_2 * se_surv / surv), 0); 
-				CI_r = min (surv * exp ( z_alpha_2 * se_surv / surv), 1); 
-			} else { # conf_type == "log-log"
-				# True survivor function is in [surv ^ exp(+- z_alpha_2 * se(log(-log(surv))))]
-				CI_l = max (surv ^ exp (- z_alpha_2 * se_surv / log(surv)), 0); 
-				CI_r = min (surv ^ exp ( z_alpha_2 * se_surv / log(surv)), 1);  
-			}	 
-			#
-			if (as.scalar (n_risk[n_time,]) == as.scalar (n_event[n_time,])) {
-				CI_l[n_time,] = 0/0;
-				CI_r[n_time,] = 0/0;
-			}	
-		
-			n_event_sum = sum (n_event);
-			n_event_sum_all = sum(n_event_all);
-			if (n_event_sum > 0) {
-				# KM_offset = (s - 1) * num_groups * 7 + (g - 1) * 7;
-				KM[1:n_time,KM_offset + 1] = time;
-				KM[1:n_time,KM_offset + 2] = n_risk;			
-				KM[1:n_time,KM_offset + 3] = n_event;
-				KM[1:n_time,KM_offset + 4] = surv;
-				KM[1:n_time,KM_offset + 5] = se_surv;
-				KM[1:n_time,KM_offset + 6] = CI_l;
-				KM[1:n_time,KM_offset + 7] = CI_r;				
-			}			
-						
-			######## ESTIMATE MEDIAN OF SERVIVAL TIMES AND ITS 100(1-ALPHA)% CONFIDENCE INTERVAL
-		
-			p_5 = ppred (surv, 0.5, "<="); 
-			pn_5 = sum (p_5);
-			#M_offset = (s - 1) * num_groups + g;
-			# if the estimated survivor function is larger than 0.5 for all timestamps median does not exist! 
-			p_5_exists = (pn_5 != 0);
-			M[M_offset,2] = n_event_sum;
-			M[M_offset,1] = n_event_sum_all; 
-			if (p_5_exists) {
-				if ( as.scalar (surv[n_time - pn_5 + 1,1]) == 0.5 ) { # if the estimated survivor function is exactly equal to 0.5
-					if (pn_5 > 1) {
-						t_5 = as.scalar ((time[n_time - pn_5 + 1,1] + time[n_time - pn_5 + 2,1])/2);
-					} else {
-						t_5 = as.scalar (time[n_time - pn_5 + 1,1]);
-					}
-				} else {
-					t_5 = as.scalar (time[n_time - pn_5 + 1,1]);
-				}
-		
-				l_ind = ppred (CI_l, 0.5, "<=");
-				r_ind = ppred (CI_r, 0.5, "<=");
-				l_ind_sum = sum (l_ind);
-				r_ind_sum = sum (r_ind);
-				l_min_ind = as.scalar (rowIndexMin (t(l_ind)));
-				r_min_ind = as.scalar (rowIndexMin (t(r_ind)));		
-				if (l_min_ind == n_time) {
-					if (l_ind_sum > 0) {
-						if (as.scalar (l_ind[n_time,1]) == 0) { # NA at last position
-							M[M_offset,4] = time[n_time - l_ind_sum,1];
-						} else {
-							M[M_offset,4] = time[1,1];
-						}
-					}
-				} else {
-					M[M_offset,4] = time[l_min_ind + 1,1];
-				}
-				#
-				if (r_min_ind == n_time) {
-					if (r_ind_sum > 0) {
-						if (as.scalar (r_ind[n_time,1]) == 0) { # NA at last position
-							M[M_offset,5] = time[n_time - r_ind_sum,1];
-						} else {
-							M[M_offset,5] = time[1,1];
-						}
-					}
-				} else {
-					M[M_offset,5] = time[r_min_ind + 1,1];
-				}
-				M[M_offset,3] = t_5;
-				if (test_type != "none"){
-					n_event_all_global[g,s] = n_event_sum_all; 
-				}
-			}
-		} else {
-			print ("group " + g + " is not present in the stratum " + s);
-			KM_cols_select[(KM_offset + 1):(KM_offset + 7),1] = matrix (0, rows = 7, cols = 1);
-			M_cols[M_offset,1] = 0;
-		}		
-	}
-	
-			
-	######## COMPARISON BETWEEN DIFFERENT GROUPS USING LOG-RANK OR WILCOXON TEST
-		
-	if (num_groups > 1 & test_type != "none") {
-
-		V = matrix (0, rows = num_groups-1, cols = num_groups-1);
-		parfor (g in 0:(num_groups-1), check = 0) {
-		
-			n_risk = n_risk_n_event_stratum[,g * 2 + 1];			
-			n_event = n_risk_n_event_stratum[,g * 2 + 2];
-		
-			if (test_type == "log-rank") {
-				O = n_event;
-				E = n_risk * n_event_stratum / n_risk_stratum;		
-			} else { ### test_type == "wilcoxon"
-				O = n_risk_stratum * n_event / range;
-				E = n_risk * n_event_stratum / range;
-			}			
-			U[(g + 1),s] = sum (O - E);
-			U_OE[g + 1, s] = (sum (O - E)*sum (O - E))/sum(E);
-			OBS[g + 1, s] = sum(O);
-			EXP[g + 1, s] = sum(E);
-		}
-		
-		# parfor (i1 in 0:(num_groups - 2), check = 0) {
-		for (i1 in 0:(num_groups - 2), check = 0) {
-		
-			n_risk = n_risk_n_event_stratum[,1 + i1 * 2]; 
-			n_event = n_risk_n_event_stratum[,2 + i1 * 2]; 
-			for (i2 in 0:(num_groups - 2)) {
-
-				n_risk_i2j = n_risk_n_event_stratum[,1 + i2 * 2]; 
-				I_i1i2 = 0;
-				if (i1 == i2) { 
-					I_i1i2 = 1;
-				}
-				if (test_type == "log-rank") {
-					V1 = n_risk * n_event_stratum * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
-					V1 = replace (target = V1, pattern = "NaN", replacement = 0);
-					V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
-					V[(i1 + 1),(i2 + 1)] = sum (V1 * V2);
-				} else { ### test_type == "wilcoxon"
-					V1 = (n_risk_stratum ^ 2) * (n_risk * n_event_stratum) * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
-					V1 = replace (target = V1, pattern = "NaN", replacement = 0);
-					V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
-					V[(i1 + 1),(i2 + 1)] = sum (V1 * V2) / (range ^ 2);
-				}
-			}
-		}
-		V_start_ind = (s - 1) * (num_groups - 1) + 1;
-		V_sum_total[,V_start_ind:(V_start_ind + num_groups - 2)] = V;
-	}
-}
-
-if (num_groups > 1 & test_type != "none") {
-	V_sum = matrix (0, rows = num_groups-1, cols = num_groups-1);
-	for (s in 1:num_strata) {
-		V_start_ind = (s - 1) * (num_groups - 1) + 1;
-	    V_sum_total_part = V_sum_total[,V_start_ind:(V_start_ind + num_groups - 2)];
-		V_sum = V_sum + V_sum_total_part;
-	}
-		
-	U_sum = rowSums (U);
-
-	test_st = as.scalar (t(U_sum[1:(num_groups-1),1]) %*% inv(V_sum) %*% U_sum[1:(num_groups-1),1]);
-	p_val = 1 - cdf (target = test_st, dist = "chisq", df = num_groups-1 );
-	if (test_type != "none") {
-		U_OE_sum = rowSums(U_OE);
-		V_OE =rowSums((U*U) /sum(V_sum));
-		TEST_GROUPS_OE[1,1] = num_groups;
-		TEST_GROUPS_OE[1,2] = num_groups - 1;
-		TEST_GROUPS_OE[1,3] = test_st;
-		TEST_GROUPS_OE[1,4] = p_val;
-		TEST[,1] = rowSums(n_event_all_global);
-		TEST[,2] = rowSums(OBS);
-		TEST[,3] = rowSums(EXP);
-		TEST[,4] = rowSums(U_OE_sum);
-		TEST[,5] = rowSums(V_OE);
-		str = append (str, test_type + " test for " + num_groups + " groups: Chi-squared = " + test_st + " on " + (num_groups - 1) + " df, p = " + p_val + " ");	
-	} 
-}
-
-GSI = removeEmpty (target = GSI, margin = "rows");
-if (n_group_cols > 0) {
-	# making a copy of unique groups before adding new rows depending on strata
-	G_cols_original = G_cols;
-
-	GSI_1 = GSI[,1];
-	tab_G = table (seq (1, nrow (GSI_1)), GSI_1, nrow (GSI_1), nrow (G_cols));
-	G_cols = tab_G %*% G_cols;
-}
-
-if (n_stratum_cols > 0) {
-	GSI_2 = GSI[,2];
-	tab_S = table (seq (1, nrow (GSI_2)), GSI_2, nrow (GSI_2), nrow (S_cols));
-	S_cols = tab_S %*% S_cols;
-}
-
-# pull out non-empty rows from M 
-M_cols = removeEmpty (target = M_cols, margin = "rows");
-tab_M = table (seq (1, nrow (M_cols)), M_cols, nrow (M_cols), nrow (M));
-M = tab_M %*% M;
-M = replace (target = M, pattern = "Infinity", replacement = "NaN");
-
-# pull out non-empty rows from TEST
-if (n_group_cols > 0 & n_stratum_cols > 0) {
-	M = append (append (G_cols, S_cols), M);
-	if (test_type != "none") {
-		TEST = append (G_cols_original, TEST);
-	}
-} else if (n_group_cols > 0) {
-	M = append (G_cols, M);
-	if (test_type != "none") {	
-		TEST = append (G_cols_original, TEST);
-	}
-} else if (n_stratum_cols > 0) {
-	M = append (S_cols, M);
-}
-
-# pull out non-empty columns from KM
-KM = t (append (t (KM), KM_cols_select) * KM_cols_select);
-KM = removeEmpty (target = KM, margin = "cols");
-KM = removeEmpty (target = KM, margin = "rows");
-KM = KM[1:(nrow (KM) - 1),];
-
-# write output matrices
-write (M, fileM, format=fmtO);
-write (KM, fileO, format=fmtO);
-
-if (test_type != "none") {
-	if (num_groups > 1 & fileT != " ") { 
-		write (TEST, fileT, format=fmtO);
-		write (TEST_GROUPS_OE, fileT+".groups.oe", format=fmtO);
-	} else {
-		print (str);
-	}	
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT ANALIZES SURVIVAL DATA USING KAPLAN-MEIER ESTIMATES 
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---          Location to read the input matrix X containing the survival data: 
+#								timestamps, whether event occurred (1) or data is censored (0), and a number of factors (categorical features) 
+#								for grouping and/or stratifying 
+# TE	  String   ---          Column indices of X which contain timestamps (first entry) and event information (second entry) 
+# GI	  String   ---          Column indices of X corresponding to the factors to be used for grouping
+# SI	  String   ---          Column indices of X corresponding to the factors to be used for stratifying				
+# O       String   ---          Location to write the matrix containing the results of the Kaplan-Meier analysis; see below for the description
+# M       String   ---          Location to write Matrix M containing the following statistic: total number of events, median and its confidence intervals; 
+#								if survival data for multiple groups and strata are provided each row of M contains the above statistics per group and stratum
+# T 	  String   " "			If survival data from multiple groups available and ttype=log-rank or wilcoxon, 
+#								location to write the matrix containing result of the (stratified) test for comparing multiple groups
+# alpha   Double   0.05         Parameter to compute 100*(1-alpha)% confidence intervals for the survivor function and its median 
+# etype   String   "greenwood"  Parameter to specify the error type according to "greenwood" (the default) or "peto"
+# ctype   String   "log"        Parameter to modify the confidence interval; "plain" keeps the lower and upper bound of 
+#								the confidence interval unmodified,	"log" (the default) corresponds to logistic transformation and 
+#								"log-log" corresponds to the complementary log-log transformation 
+# ttype   String   "none"   	If survival data for multiple groups is available specifies which test to perform for comparing 
+#								survival data across multiple groups: "none" (the default) "log-rank" or "wilcoxon" test   
+# fmt     String   "text"       The output format of results of the Kaplan-Meier analysis, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- Matrix KM whose dimension depends on the number of groups (denoted by g) and strata (denoted by s) in the data: 
+#	each collection of 7 consecutive columns in KM corresponds to a unique combination of groups and strata in the data with the following schema
+# 	1. col: timestamp
+# 	2. col: no. at risk
+# 	3. col: no. of events
+# 	4. col: Kaplan-Meier estimate of survivor function surv
+# 	5. col: standard error of surv
+# 	6. col: lower 100*(1-alpha)% confidence interval for surv
+# 	7. col: upper 100*(1-alpha)% confidence interval for surv
+# 2- Matrix M whose dimension depends on the number of groups (g) and strata (s) in the data (k denotes the number of factors used for grouping 
+#	,i.e., ncol(GI) and l denotes the number of factors used for stratifying, i.e., ncol(SI))
+#	M[,1:k]: unique combination of values in the k factors used for grouping 
+#	M[,(k+1):(k+l)]: unique combination of values in the l factors used for stratifying
+#	M[,k+l+1]: total number of records
+#	M[,k+l+2]: total number of events
+#	M[,k+l+3]: median of surv
+#	M[,k+l+4]: lower 100*(1-alpha)% confidence interval of the median of surv 
+#	M[,k+l+5]: upper 100*(1-alpha)% confidence interval of the median of surv
+#	If the number of groups and strata is equal to 1, M will have 4 columns with 
+#	M[,1]: total number of events
+#	M[,2]: median of surv
+#	M[,3]: lower 100*(1-alpha)% confidence interval of the median of surv 
+#	M[,4]: upper 100*(1-alpha)% confidence interval of the median of surv
+# 3- If survival data from multiple groups available and ttype=log-rank or wilcoxon, a 1 x 4 matrix T and an g x 5 matrix T_GROUPS_OE with
+#	T_GROUPS_OE[,1] = no. of events	
+#	T_GROUPS_OE[,2] = observed value (O)
+#	T_GROUPS_OE[,3] = expected value (E)
+#	T_GROUPS_OE[,4] = (O-E)^2/E
+#	T_GROUPS_OE[,5] = (O-E)^2/V 	 
+#	T[1,1] = no. of groups
+#	T[1,2] = degree of freedom for Chi-squared distributed test statistic
+#	T[1,3] = test statistic 
+#	T[1,4] = P-value
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f KM.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE GI=INPUT_DIR/GI SI=INPUT_DIR/SI O=OUTPUT_DIR/O 
+#											M=OUTPUT_DIR/M T=OUTPUT_DIR/T alpha=0.05 etype=greenwood ctype=log fmt=csv
+
+fileX = $X;
+fileTE = $TE;
+fileGI = ifdef ($GI, " ");
+fileSI = ifdef ($SI, " ");
+fileO = $O;
+fileM = $M;
+
+# Default values of some parameters
+fileT = ifdef ($T, " ");                  # $T=" "
+
+fileG = ifdef ($G, " ");                 # $G=" "
+fileS = ifdef ($S, " ");                 # $S=" "
+fmtO = ifdef ($fmt, "text");             # $fmt="text"
+alpha = ifdef ($alpha, 0.05);            # $alpha=0.05
+err_type  = ifdef ($etype, "greenwood"); # $etype="greenwood"
+conf_type = ifdef ($ctype, "log");       # $ctype="log"
+test_type = ifdef ($ttype, "none");      # $ttype="none"
+
+X = read (fileX);
+TE = read (fileTE);
+if (fileGI != " ") {
+	GI = read (fileGI);
+} else {
+    GI = matrix (0, rows = 1, cols = 1);
+}
+
+if (fileSI != " "){
+	SI = read (fileSI);
+} else {
+    SI = matrix (0, rows = 1, cols = 1);
+}
+
+TE = t(TE);
+GI = t(GI);
+SI = t(SI);
+		
+# check arguments for validity
+if (err_type != "greenwood" & err_type != "peto") { 
+	stop (err_type + " is not a valid error type!");
+}
+
+if (conf_type != "plain" & conf_type != "log" & conf_type != "log-log") { 
+	stop (conf_type + " is not a valid confidence type!");
+}
+
+if (test_type != "log-rank" & test_type != "wilcoxon" & test_type != "none") {
+	stop (test_type + " is not a valid test type!");
+}
+
+n_group_cols = ncol (GI);
+n_stratum_cols = ncol (SI);
+
+# check GI and SI for validity
+GI_1_1 = as.scalar (GI[1,1]);
+SI_1_1 = as.scalar (SI[1,1]);	
+if (n_group_cols == 1) {
+	if (GI_1_1 == 0) { # no factors for grouping
+		n_group_cols = 0;
+	}
+} else if (GI_1_1 == 0) {
+	stop ("Matrix GI contains zero entries!");
+}
+if (n_stratum_cols == 1) {
+	if (SI_1_1 == 0) { # no factors for stratifying
+		n_stratum_cols = 0;
+	}
+} else if (SI_1_1 == 0) {
+	stop ("Matrix SI contains zero entries!");
+}
+
+if (2 + n_group_cols + n_stratum_cols > ncol (X)) {
+	stop ("X has an incorrect number of columns!");
+}
+
+# reorder cols of X 
+if (GI_1_1 == 0 & SI_1_1 == 0) {
+	Is = TE;
+} else if (GI_1_1 == 0) {
+	Is = append (TE, SI);
+} else if (SI_1_1 == 0) {
+	Is = append (TE, GI);
+} else {
+	Is = append (TE, append (GI, SI));
+}
+X = X %*% table (Is, seq (1, 2 + n_group_cols + n_stratum_cols), ncol (X), 2 + n_group_cols + n_stratum_cols);	
+
+num_records = nrow (X);
+num_groups = 1;
+num_strata = 1;
+
+### compute group id for each record
+print ("Perform grouping...");
+if (n_group_cols > 0) {
+	for (g in 1:n_group_cols) { # sort columns corresponding to groups sequentially
+		X = order (target = X, by = 2 + g);
+	}
+	XG = X[,3:(3 + n_group_cols - 1)];
+	Idx = matrix (1, rows = num_records, cols = 1);
+	Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),3:(2 + n_group_cols)], X[2:num_records,3:(2 + n_group_cols)], "!="));
+	num_groups = sum (Idx);
+
+	XG = replace (target = XG, pattern = 0, replacement = "Infinity");
+	XG = XG * Idx;
+	XG = replace (target = XG, pattern = "NaN", replacement = 0);	
+	G_cols = removeEmpty (target = XG, margin = "rows"); 
+	G_cols = replace (target = G_cols, pattern = "Infinity", replacement = 0);	
+
+	A = removeEmpty (target = diag (Idx), margin = "cols");
+	if (ncol (A) > 1) {
+		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
+		B = cumsum (A);
+		Gi = B %*% seq(1, ncol(B)); # group ids
+	} else { # there is only one group
+		Gi = matrix (1, rows = num_records, cols = 1);
+	}
+	if (n_stratum_cols > 0) {
+		X = append (append (X[,1:2],Gi), X[,(3 + g):ncol (X)]);
+	} else { # no strata
+		X = append (X[,1:2],Gi);
+	}
+}
+
+### compute stratum id for each record
+print ("Perform stratifying...");
+if (n_stratum_cols > 0) {
+	s_offset = 2;
+	if (n_group_cols > 0) {
+		s_offset = 3;
+	}
+	for (s in 1:n_stratum_cols) { # sort columns corresponding to strata sequentially
+		X = order (target = X, by = s_offset + s);		
+	}
+	XS = X[,(s_offset + 1):(s_offset + n_stratum_cols)];
+	Idx = matrix (1, rows = num_records, cols = 1);
+	Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),(s_offset + 1):(s_offset + n_stratum_cols)], X[2:num_records,(s_offset + 1):(s_offset + n_stratum_cols)], "!="));
+	num_strata = sum (Idx);
+
+	XS = replace (target = XS, pattern = 0, replacement = "Infinity");
+	XS = XS * Idx;
+	XS = replace (target = XS, pattern = "NaN", replacement = 0);	
+	S_cols = removeEmpty (target = XS, margin = "rows"); 
+	S_cols = replace (target = S_cols, pattern = "Infinity", replacement = 0);	
+
+	SB = removeEmpty (target = seq (1,num_records), margin = "rows", select = Idx); # indices of stratum boundaries 
+	A = removeEmpty (target = diag (Idx), margin = "cols");
+	if (ncol (A) > 1) {
+		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
+		B = cumsum (A);
+		Si = B %*% seq(1, ncol(B)); # stratum ids
+	} else { # there is only one stratum
+		Si = matrix (1, rows = num_records, cols = 1);
+	}
+	X = append (X[,1:3],Si);
+}
+
+if (n_group_cols == 0 & n_stratum_cols == 0) {
+	X = append (X, matrix (1, rows = num_records, cols = 2));
+	SB = matrix (1, rows = 1, cols = 1);	
+} else if (n_group_cols == 0) {	
+	X = append (X[,1:2], append (matrix (1, rows = num_records, cols = 1), X[,3]));
+} else if (n_stratum_cols == 0) {
+	X = append (X, matrix (1, rows = num_records, cols = 1));
+	SB = matrix (1, rows = 1, cols = 1);
+}
+
+######## BEGIN KAPLAN-MEIER ANALYSIS
+print ("BEGIN KAPLAN-MEIER SURVIVAL FIT SCRIPT");
+
+KM = matrix (0, rows = num_records, cols = num_groups * num_strata * 7);
+KM_cols_select = matrix (1, rows = num_groups * num_strata * 7, cols = 1);
+GSI = matrix (0, rows = num_groups * num_strata, cols = 2);
+a = 1/0;
+M = matrix (a, rows = num_groups * num_strata, cols = 5);
+M_cols = seq (1, num_groups * num_strata);
+z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
+
+if (num_groups > 1 & test_type != "none") { 
+	str = "";
+	TEST = matrix (0, rows = num_groups, cols = 5);
+	TEST_GROUPS_OE = matrix (0, rows = 1, cols = 4);
+	U = matrix (0, rows = num_groups, cols = num_strata);
+	U_OE = matrix (0, rows = num_groups, cols = num_strata);
+	OBS = matrix (0, rows = num_groups, cols = num_strata);
+	EXP = matrix (0, rows = num_groups, cols = num_strata);
+	V_sum_total = matrix (0, rows = num_groups-1, cols = (num_groups-1) * num_strata);
+	n_event_all_global = matrix(1, rows=num_groups, cols=num_strata);
+} else if (num_groups == 1 & test_type != "none") {
+	stop ("Data contains only one group or no groups, at least two groups are required for test!");
+}
+
+parfor (s in 1:num_strata, check = 0) {
+	
+	start_ind = as.scalar (SB[s,]);
+	end_ind = num_records;
+	if (s != num_strata) {
+		end_ind = as.scalar (SB[s + 1,]) - 1;
+	} 
+	
+	######## RECODING TIMESTAMPS PRESERVING THE ORDER
+	
+	X_cur = X[start_ind:end_ind,];
+	range = end_ind - start_ind + 1;
+	X_cur = order (target = X_cur, by = 1);
+	Idx1 = matrix (1, rows = range, cols = 1);
+	
+	num_timestamps = 1;
+	if (range == 1) {
+		RT = matrix (1, rows = 1, cols = 1);
+	} else {
+		Idx1[2:range,1] = ppred (X_cur[1:(range - 1),1], X_cur[2:range,1], "!=");
+		num_timestamps = sum (Idx1);
+		A1 = removeEmpty (target = diag (Idx1), margin = "cols");
+		if (ncol (A1) > 1) {
+			A1[,1:(ncol (A1) - 1)] = A1[,1:(ncol (A1) - 1)] - A1[,2:ncol (A1)];
+			B1 = cumsum (A1);
+			RT = B1 %*% seq(1, ncol(B1)); 	
+		} else { # there is only one group
+			RT = matrix (1, rows = range, cols = 1);
+		}
+	}
+	
+	T = X_cur[,1];
+	E = X_cur[,2];
+	G = X_cur[,3];
+	S = X_cur[,4];
+	
+	n_event_stratum = aggregate (target = E, groups = RT, fn = "sum"); # no. of uncensored events per stratum 
+	n_event_all_stratum = aggregate (target = E, groups = RT, fn = "count"); # no. both censored and uncensored of events per stratum 
+	Idx1 = cumsum (n_event_all_stratum); 
+	time_stratum = table (seq (1, nrow (Idx1), 1), Idx1) %*% T; # distinct timestamps both censored and uncensored per stratum 
+	time_stratum_has_zero = sum (ppred (time_stratum, 0, "==")) > 0;
+	if (time_stratum_has_zero) {
+		time_stratum = 	replace (target = time_stratum, pattern = 0, replacement = "Infinity");
+	}
+	n_time_all1 = nrow (n_event_stratum);  # no. of distinct timestamps both censored and uncensored per stratum
+	n_event_all_stratum_agg = matrix (0, rows = n_time_all1, cols = 1); 
+	if (n_time_all1 > 1) {
+		n_event_all_stratum_agg[2:n_time_all1,] = Idx1[1:(n_time_all1 - 1),]; 
+	}
+	n_risk_stratum = range - n_event_all_stratum_agg; # no. at risk per stratum
+
+	if (num_groups > 1 & test_type != "none") {	# needed for log-rank or wilcoxon test	
+		n_risk_n_event_stratum = matrix (0, rows = n_time_all1, cols = num_groups * 2);
+	}
+
+	parfor (g in 1:num_groups, check = 0) {
+	
+		group_ind = ppred (G, g, "==");
+		KM_offset = (s - 1) * num_groups * 7 + (g - 1) * 7;
+		M_offset = (s - 1) * num_groups + g;
+		if (sum (group_ind) != 0) { # group g is present in the stratum s
+
+			GSI_offset = (s - 1) * num_groups + g; 
+			GSI[GSI_offset,1] = g;
+			GSI[GSI_offset,2] = s;		
+			E_cur = E * group_ind;
+
+			######## COMPUTE NO. AT RISK AND NO.OF EVENTS FOR EACH TIMESTAMP
+			
+			n_event = aggregate (target = E_cur, groups = RT, fn = "sum"); # no. of uncensored events per stratum per group
+			n_event_all = aggregate (target = group_ind, groups = RT, fn = "sum"); # no. of both censored and uncensored events per stratum per group
+			Idx1 = cumsum (n_event_all); 
+			event_occurred = ppred (n_event, 0, ">");
+			if (time_stratum_has_zero) {
+				time = replace (target = time_stratum * event_occurred, pattern = "NaN", replacement = 0);
+				time = removeEmpty (target = time, margin = "rows");
+				time = replace (target = time, pattern = "Infinity", replacement = 0);
+			} else {
+				time = removeEmpty (target = time_stratum * event_occurred, margin = "rows");
+			}
+			n_time_all2 = nrow (n_event);  # no. of distinct timestamps both censored and uncensored per stratum per group
+			n_event_all_agg = matrix (0, rows = n_time_all2, cols = 1); 
+			if (n_time_all2 > 1) {
+				n_event_all_agg[2:n_time_all2,] = Idx1[1:(n_time_all2 - 1),]; 
+			}
+				
+			n_risk = sum (group_ind) - n_event_all_agg; # no. at risk per stratum per group
+			
+			if (num_groups > 1 & test_type != "none") {
+				n_risk_n_event_stratum[,(g - 1) * 2 + 1] = n_risk;
+				n_risk_n_event_stratum[,(g - 1) * 2 + 2] = n_event;					
+			}
+			
+			# Extract only rows corresponding to events, i.e., for which n_event is nonzero 
+			Idx1 = ppred (n_event, 0, "!=");
+			KM_1 = matrix (0, rows = n_time_all2, cols = 2);
+			KM_1[,1] = n_risk;
+			KM_1[,2] = n_event;
+			KM_1 = removeEmpty (target = KM_1, margin = "rows", select = Idx1);
+			n_risk = KM_1[,1];
+			n_event = KM_1[,2];
+			n_time = nrow (time);
+			
+			######## ESTIMATE SERVIVOR FUNCTION SURV, ITS STANDARD ERROR SE_SURV, AND ITS 100(1-ALPHA)% CONFIDENCE INTERVAL	
+			surv = cumprod ((n_risk - n_event) / n_risk);
+			tmp = n_event / (n_risk * (n_risk - n_event));
+			se_surv = sqrt (cumsum (tmp)) * surv; 
+			if (err_type == "peto") {
+				se_surv = (surv * sqrt(1 - surv) / sqrt(n_risk));		
+			}
+		
+			if (conf_type == "plain") { 
+				# True survivor function is in [surv +- z_alpha_2 * se_surv], 
+				# values less than 0 are replaced by 0, values larger than 1are replaced by 1!
+				CI_l = max (surv - (z_alpha_2 * se_surv), 0);  
+				CI_r = min (surv + (z_alpha_2 * se_surv), 1); 
+			} else if (conf_type == "log") {
+				# True survivor function is in [surv * exp(+- z_alpha_2 * se_surv / surv)]
+				CI_l = max (surv * exp (- z_alpha_2 * se_surv / surv), 0); 
+				CI_r = min (surv * exp ( z_alpha_2 * se_surv / surv), 1); 
+			} else { # conf_type == "log-log"
+				# True survivor function is in [surv ^ exp(+- z_alpha_2 * se(log(-log(surv))))]
+				CI_l = max (surv ^ exp (- z_alpha_2 * se_surv / log(surv)), 0); 
+				CI_r = min (surv ^ exp ( z_alpha_2 * se_surv / log(surv)), 1);  
+			}	 
+			#
+			if (as.scalar (n_risk[n_time,]) == as.scalar (n_event[n_time,])) {
+				CI_l[n_time,] = 0/0;
+				CI_r[n_time,] = 0/0;
+			}	
+		
+			n_event_sum = sum (n_event);
+			n_event_sum_all = sum(n_event_all);
+			if (n_event_sum > 0) {
+				# KM_offset = (s - 1) * num_groups * 7 + (g - 1) * 7;
+				KM[1:n_time,KM_offset + 1] = time;
+				KM[1:n_time,KM_offset + 2] = n_risk;			
+				KM[1:n_time,KM_offset + 3] = n_event;
+				KM[1:n_time,KM_offset + 4] = surv;
+				KM[1:n_time,KM_offset + 5] = se_surv;
+				KM[1:n_time,KM_offset + 6] = CI_l;
+				KM[1:n_time,KM_offset + 7] = CI_r;				
+			}			
+						
+			######## ESTIMATE MEDIAN OF SERVIVAL TIMES AND ITS 100(1-ALPHA)% CONFIDENCE INTERVAL
+		
+			p_5 = ppred (surv, 0.5, "<="); 
+			pn_5 = sum (p_5);
+			#M_offset = (s - 1) * num_groups + g;
+			# if the estimated survivor function is larger than 0.5 for all timestamps median does not exist! 
+			p_5_exists = (pn_5 != 0);
+			M[M_offset,2] = n_event_sum;
+			M[M_offset,1] = n_event_sum_all; 
+			if (p_5_exists) {
+				if ( as.scalar (surv[n_time - pn_5 + 1,1]) == 0.5 ) { # if the estimated survivor function is exactly equal to 0.5
+					if (pn_5 > 1) {
+						t_5 = as.scalar ((time[n_time - pn_5 + 1,1] + time[n_time - pn_5 + 2,1])/2);
+					} else {
+						t_5 = as.scalar (time[n_time - pn_5 + 1,1]);
+					}
+				} else {
+					t_5 = as.scalar (time[n_time - pn_5 + 1,1]);
+				}
+		
+				l_ind = ppred (CI_l, 0.5, "<=");
+				r_ind = ppred (CI_r, 0.5, "<=");
+				l_ind_sum = sum (l_ind);
+				r_ind_sum = sum (r_ind);
+				l_min_ind = as.scalar (rowIndexMin (t(l_ind)));
+				r_min_ind = as.scalar (rowIndexMin (t(r_ind)));		
+				if (l_min_ind == n_time) {
+					if (l_ind_sum > 0) {
+						if (as.scalar (l_ind[n_time,1]) == 0) { # NA at last position
+							M[M_offset,4] = time[n_time - l_ind_sum,1];
+						} else {
+							M[M_offset,4] = time[1,1];
+						}
+					}
+				} else {
+					M[M_offset,4] = time[l_min_ind + 1,1];
+				}
+				#
+				if (r_min_ind == n_time) {
+					if (r_ind_sum > 0) {
+						if (as.scalar (r_ind[n_time,1]) == 0) { # NA at last position
+							M[M_offset,5] = time[n_time - r_ind_sum,1];
+						} else {
+							M[M_offset,5] = time[1,1];
+						}
+					}
+				} else {
+					M[M_offset,5] = time[r_min_ind + 1,1];
+				}
+				M[M_offset,3] = t_5;
+				if (test_type != "none"){
+					n_event_all_global[g,s] = n_event_sum_all; 
+				}
+			}
+		} else {
+			print ("group " + g + " is not present in the stratum " + s);
+			KM_cols_select[(KM_offset + 1):(KM_offset + 7),1] = matrix (0, rows = 7, cols = 1);
+			M_cols[M_offset,1] = 0;
+		}		
+	}
+	
+			
+	######## COMPARISON BETWEEN DIFFERENT GROUPS USING LOG-RANK OR WILCOXON TEST
+		
+	if (num_groups > 1 & test_type != "none") {
+
+		V = matrix (0, rows = num_groups-1, cols = num_groups-1);
+		parfor (g in 0:(num_groups-1), check = 0) {
+		
+			n_risk = n_risk_n_event_stratum[,g * 2 + 1];			
+			n_event = n_risk_n_event_stratum[,g * 2 + 2];
+		
+			if (test_type == "log-rank") {
+				O = n_event;
+				E = n_risk * n_event_stratum / n_risk_stratum;		
+			} else { ### test_type == "wilcoxon"
+				O = n_risk_stratum * n_event / range;
+				E = n_risk * n_event_stratum / range;
+			}			
+			U[(g + 1),s] = sum (O - E);
+			U_OE[g + 1, s] = (sum (O - E)*sum (O - E))/sum(E);
+			OBS[g + 1, s] = sum(O);
+			EXP[g + 1, s] = sum(E);
+		}
+		
+		# parfor (i1 in 0:(num_groups - 2), check = 0) {
+		for (i1 in 0:(num_groups - 2), check = 0) {
+		
+			n_risk = n_risk_n_event_stratum[,1 + i1 * 2]; 
+			n_event = n_risk_n_event_stratum[,2 + i1 * 2]; 
+			for (i2 in 0:(num_groups - 2)) {
+
+				n_risk_i2j = n_risk_n_event_stratum[,1 + i2 * 2]; 
+				I_i1i2 = 0;
+				if (i1 == i2) { 
+					I_i1i2 = 1;
+				}
+				if (test_type == "log-rank") {
+					V1 = n_risk * n_event_stratum * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
+					V1 = replace (target = V1, pattern = "NaN", replacement = 0);
+					V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
+					V[(i1 + 1),(i2 + 1)] = sum (V1 * V2);
+				} else { ### test_type == "wilcoxon"
+					V1 = (n_risk_stratum ^ 2) * (n_risk * n_event_stratum) * (n_risk_stratum - n_event_stratum) / (n_risk_stratum * (n_risk_stratum - 1));
+					V1 = replace (target = V1, pattern = "NaN", replacement = 0);
+					V2 = I_i1i2 - (n_risk_i2j / n_risk_stratum);
+					V[(i1 + 1),(i2 + 1)] = sum (V1 * V2) / (range ^ 2);
+				}
+			}
+		}
+		V_start_ind = (s - 1) * (num_groups - 1) + 1;
+		V_sum_total[,V_start_ind:(V_start_ind + num_groups - 2)] = V;
+	}
+}
+
+if (num_groups > 1 & test_type != "none") {
+	V_sum = matrix (0, rows = num_groups-1, cols = num_groups-1);
+	for (s in 1:num_strata) {
+		V_start_ind = (s - 1) * (num_groups - 1) + 1;
+	    V_sum_total_part = V_sum_total[,V_start_ind:(V_start_ind + num_groups - 2)];
+		V_sum = V_sum + V_sum_total_part;
+	}
+		
+	U_sum = rowSums (U);
+
+	test_st = as.scalar (t(U_sum[1:(num_groups-1),1]) %*% inv(V_sum) %*% U_sum[1:(num_groups-1),1]);
+	p_val = 1 - cdf (target = test_st, dist = "chisq", df = num_groups-1 );
+	if (test_type != "none") {
+		U_OE_sum = rowSums(U_OE);
+		V_OE =rowSums((U*U) /sum(V_sum));
+		TEST_GROUPS_OE[1,1] = num_groups;
+		TEST_GROUPS_OE[1,2] = num_groups - 1;
+		TEST_GROUPS_OE[1,3] = test_st;
+		TEST_GROUPS_OE[1,4] = p_val;
+		TEST[,1] = rowSums(n_event_all_global);
+		TEST[,2] = rowSums(OBS);
+		TEST[,3] = rowSums(EXP);
+		TEST[,4] = rowSums(U_OE_sum);
+		TEST[,5] = rowSums(V_OE);
+		str = append (str, test_type + " test for " + num_groups + " groups: Chi-squared = " + test_st + " on " + (num_groups - 1) + " df, p = " + p_val + " ");	
+	} 
+}
+
+GSI = removeEmpty (target = GSI, margin = "rows");
+if (n_group_cols > 0) {
+	# making a copy of unique groups before adding new rows depending on strata
+	G_cols_original = G_cols;
+
+	GSI_1 = GSI[,1];
+	tab_G = table (seq (1, nrow (GSI_1)), GSI_1, nrow (GSI_1), nrow (G_cols));
+	G_cols = tab_G %*% G_cols;
+}
+
+if (n_stratum_cols > 0) {
+	GSI_2 = GSI[,2];
+	tab_S = table (seq (1, nrow (GSI_2)), GSI_2, nrow (GSI_2), nrow (S_cols));
+	S_cols = tab_S %*% S_cols;
+}
+
+# pull out non-empty rows from M 
+M_cols = removeEmpty (target = M_cols, margin = "rows");
+tab_M = table (seq (1, nrow (M_cols)), M_cols, nrow (M_cols), nrow (M));
+M = tab_M %*% M;
+M = replace (target = M, pattern = "Infinity", replacement = "NaN");
+
+# pull out non-empty rows from TEST
+if (n_group_cols > 0 & n_stratum_cols > 0) {
+	M = append (append (G_cols, S_cols), M);
+	if (test_type != "none") {
+		TEST = append (G_cols_original, TEST);
+	}
+} else if (n_group_cols > 0) {
+	M = append (G_cols, M);
+	if (test_type != "none") {	
+		TEST = append (G_cols_original, TEST);
+	}
+} else if (n_stratum_cols > 0) {
+	M = append (S_cols, M);
+}
+
+# pull out non-empty columns from KM
+KM = t (append (t (KM), KM_cols_select) * KM_cols_select);
+KM = removeEmpty (target = KM, margin = "cols");
+KM = removeEmpty (target = KM, margin = "rows");
+KM = KM[1:(nrow (KM) - 1),];
+
+# write output matrices
+write (M, fileM, format=fmtO);
+write (KM, fileO, format=fmtO);
+
+if (test_type != "none") {
+	if (num_groups > 1 & fileT != " ") { 
+		write (TEST, fileT, format=fmtO);
+		write (TEST_GROUPS_OE, fileT+".groups.oe", format=fmtO);
+	} else {
+		print (str);
+	}	
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/Kmeans-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans-predict.dml b/scripts/algorithms/Kmeans-predict.dml
index 8496585..5bd78bd 100644
--- a/scripts/algorithms/Kmeans-predict.dml
+++ b/scripts/algorithms/Kmeans-predict.dml
@@ -1,339 +1,339 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# Compares two categorical data vectors (presumed to be clusterings) and
-# counts matching/nonmatching same-cluster/different-cluster pairs of rows
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------
-# spY   String  " "     Location to read a column-vector with the "specified"
-#                       assignment of records (rows) to categories (clusters)
-# prY   String  " "     Location to read (or write, if X and C are present) a
-#                       column-vector with the "predicted" assignment of rows
-#                       to clusters.  NOTE: The same category may be labeled
-#                       differently in each of the two vectors, spY and prY.
-# fmt   String "text"   Matrix output format for prY, usually "text" or "csv"
-# X     String  " "     Location to read matrix X with the input data records
-# C     String  " "     Location to read matrix C with the cluster centroids
-#                       NOTE: If X and C are present, prY is an output file.
-# O     String  " "     Location to write the printed output statistics
-# ---------------------------------------------------------------------------
-#
-# The "O" file provides the output statistics in CSV format, one per line, in
-# the following format: NAME, [CID], VALUE.  Note:
-#   - The 1st group statistics are given if X input is available;
-#   - The 2nd group statistics are given if X and C inputs are available;
-#   - The 3rd and 4th group statistics are given if spY input is available;
-#   - Only the 4th group statistics contain a nonempty CID value;
-#   - When present, CID contains either the specified category label or the
-#     predicted cluster label.
-#
-# NAME            CID   MEANING
-# ---------------------------------------------------------------------------
-# TSS                   Total Sum of Squares (from the total mean)
-# WCSS_M                Within-Cluster  Sum of Squares (means as centers)
-# WCSS_M_PC             Within-Cluster  Sum of Squares (means), in % of TSS
-# BCSS_M                Between-Cluster Sum of Squares (means as centers)
-# BCSS_M_PC             Between-Cluster Sum of Squares (means), in % of TSS
-#
-# WCSS_C                Within-Cluster  Sum of Squares (centroids as centers)
-# WCSS_C_PC             Within-Cluster  Sum of Squares (centroids), % of TSS
-# BCSS_C                Between-Cluster Sum of Squares (centroids as centers)
-# BCSS_C_PC             Between-Cluster Sum of Squares (centroids), % of TSS
-# 
-# TRUE_SAME_CT          Same-category pairs predicted as Same-cluster, count
-# TRUE_SAME_PC          Same-category pairs predicted as Same-cluster, %
-# TRUE_DIFF_CT          Diff-category pairs predicted as Diff-cluster, count
-# TRUE_DIFF_PC          Diff-category pairs predicted as Diff-cluster, %
-# FALSE_SAME_CT         Diff-category pairs predicted as Same-cluster, count
-# FALSE_SAME_PC         Diff-category pairs predicted as Same-cluster, %
-# FALSE_DIFF_CT         Same-category pairs predicted as Diff-cluster, count
-# FALSE_DIFF_PC         Same-category pairs predicted as Diff-cluster, %
-# 
-# SPEC_TO_PRED     +    For specified category, the best predicted cluster id
-# SPEC_FULL_CT     +    For specified category, its full count
-# SPEC_MATCH_CT    +    For specified category, best-cluster matching count
-# SPEC_MATCH_PC    +    For specified category, % of matching to full count
-# PRED_TO_SPEC     +    For predicted cluster, the best specified category id
-# PRED_FULL_CT     +    For predicted cluster, its full count
-# PRED_MATCH_CT    +    For predicted cluster, best-category matching count
-# PRED_MATCH_PC    +    For predicted cluster, % of matching to full count
-# ---------------------------------------------------------------------------
-#
-# Examples:
-# 1. To predict Y given X and C:
-#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs X=INPUT_DIR/X
-#         C=INPUT_DIR/C prY=OUTPUT_DIR/PredY O=OUTPUT_DIR/stats
-# 2. To compare "actual" labels spY with "predicted" labels given X and C:
-#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs X=INPUT_DIR/X
-#         C=INPUT_DIR/C spY=INPUT_DIR/Y O=OUTPUT_DIR/stats
-# 3. To compare "actual" labels spY with given "predicted" labels prY:
-#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs spY=INPUT_DIR/Y
-#         prY=INPUT_DIR/PredY O=OUTPUT_DIR/stats
-
-
-fmt_prY = ifdef ($fmt, "text");
-filePrY = ifdef ($prY, " ");
-fileSpY = ifdef ($spY, " ");
-fileX   = ifdef ($X, " ");
-fileC   = ifdef ($C, " ");
-fileO   = ifdef ($O, " ");
-
-is_str_empty = TRUE;
-str = " ";
-
-print ("BEGIN K-MEANS SCORING SCRIPT");
-
-if (fileX != " ") {
-    print ("Reading X...");
-    X = read (fileX);
-    total_mean = colSums (X) / nrow (X);
-    total_ss = sum( (X - total_mean)^2 );
-}
-
-if ((fileC != " ") & (fileX == " ")) {
-    print ("ERROR: Cannot provide C without providing X.");
-} else {
-
-
-if (fileC != " ") {
-    print ("Reading C...");
-    C = read (fileC);
-    num_clusters = nrow (C);
-    ones_C = matrix (1, rows = num_clusters, cols = 1);
-    print ("Computing the predicted Y...");
-    D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
-    prY = rowIndexMin (D);
-    if (filePrY != " ") {
-        print ("Writing the predicted Y...");
-        write (prY, filePrY, format=fmt_prY);
-    }
-} else {
-    print ("Reading the predicted Y...");
-    prY = read (filePrY);
-    num_clusters = max (prY);
-    ones_C = matrix (1, rows = num_clusters, cols = 1);
-}
-
-if (fileX != " ") {
-    print ("Computing the WCSS...");
-    # Compute projection matrix from clusters to records
-    P = matrix (0, rows = nrow (X), cols = num_clusters);
-    P [, 1 : max (prY)] = table (seq (1, nrow (X), 1), prY);
-    # Compute the means, as opposed to the centroids
-    cluster_sizes = t(colSums (P));
-    record_of_ones = matrix (1, rows = 1, cols = ncol (X));
-    M = (t(P) %*% X) / ((cluster_sizes + ppred (cluster_sizes, 0, "==")) %*% record_of_ones);
-    # Compute the WCSS for the means
-    wcss_means = sum ((X - P %*% M) ^ 2);
-    wcss_means_pc = 100.0 * wcss_means / total_ss;
-    bcss_means = sum (cluster_sizes * rowSums ((M - ones_C %*% total_mean) ^ 2));
-    bcss_means_pc = 100.0 * bcss_means / total_ss;
-    # Output results
-    print ("Total Sum of Squares (TSS) = " + total_ss);
-    print ("WCSS for cluster means: " + (round (10000.0 * wcss_means_pc) / 10000.0) + "% of TSS = " + wcss_means);
-    print ("BCSS for cluster means: " + (round (10000.0 * bcss_means_pc) / 10000.0) + "% of TSS = " + bcss_means);
-    str = "TSS,," + total_ss;
-    str = append (str, "WCSS_M,," + wcss_means);
-    str = append (str, "WCSS_M_PC,," + wcss_means_pc);
-    str = append (str, "BCSS_M,," + bcss_means);
-    str = append (str, "BCSS_M_PC,," + bcss_means_pc);
-    is_str_empty = FALSE;
-}
-
-if (fileC != " ") {        
-    # Compute the WCSS for the centroids
-    wcss_centroids = sum ((X - P %*% C) ^ 2);
-    wcss_centroids_pc = 100.0 * wcss_centroids / total_ss;
-    bcss_centroids = sum (cluster_sizes * rowSums ((C - ones_C %*% total_mean) ^ 2));
-    bcss_centroids_pc = 100.0 * bcss_centroids / total_ss;
-    # Output results
-    print ("WCSS for centroids: " + (round (10000.0 * wcss_centroids_pc) / 10000.0) + "% of TSS = " + wcss_centroids);
-    print ("BCSS for centroids: " + (round (10000.0 * bcss_centroids_pc) / 10000.0) + "% of TSS = " + bcss_centroids);
-    str = append (str, "WCSS_C,," + wcss_centroids);
-    str = append (str, "WCSS_C_PC,," + wcss_centroids_pc);
-    str = append (str, "BCSS_C,," + bcss_centroids);
-    str = append (str, "BCSS_C_PC,," + bcss_centroids_pc);
-}
-
-
-
-if (fileSpY != " ") {
-
-print ("Reading the specified Y...");
-spY = read (fileSpY);
-num_records = nrow (spY);
-    
-if (num_records != nrow (prY) | ncol (spY) != 1 | ncol (prY) != 1) {
-    print ("ERROR: spY and/or prY size mismatch");
-    print ("nrow (spY) = " + nrow (spY) + ";  ncol (spY) = " + ncol (spY)
-      + ";  nrow (prY) = " + nrow (prY) + ";  ncol (prY) = " + ncol (prY));
-} else {
-
-    print ("Computing the pairs counts...");
-
-    orig_min_spY = min (spY);
-    orig_min_prY = min (prY);
-    spY = spY + (1 - orig_min_spY);
-    prY = prY + (1 - orig_min_prY);
-    
-    spYprY_row_counts = table (spY, prY);
-    spY_row_counts = rowSums (spYprY_row_counts);
-    prY_row_counts = t(colSums (spYprY_row_counts));
-
-    # Count all pairs of rows having the same (spY, prY)-values
-    spYprY_pair_counts = spYprY_row_counts * (spYprY_row_counts - 1) / 2;
-
-    # Count all pairs of rows having the same spY-values
-    spY_pair_counts = spY_row_counts * (spY_row_counts - 1) / 2;
-    # Count all pairs of rows having the same prY-values
-    prY_pair_counts = prY_row_counts * (prY_row_counts - 1) / 2;
-
-    num_pairs = num_records * (num_records - 1.0) / 2.0;
-
-    num_TP_pairs = sum (spYprY_pair_counts);
-    num_FP_pairs = sum (prY_pair_counts) - num_TP_pairs;
-    num_FN_pairs = sum (spY_pair_counts) - num_TP_pairs;
-    num_TN_pairs = num_pairs - num_TP_pairs - num_FP_pairs - num_FN_pairs;
-    
-    pct_TP_pairs = num_TP_pairs / num_pairs * 100.0;
-    pct_TN_pairs = num_TN_pairs / num_pairs * 100.0;
-    pct_FP_pairs = num_FP_pairs / num_pairs * 100.0;
-    pct_FN_pairs = num_FN_pairs / num_pairs * 100.0;
-    
-    if (is_str_empty) {
-        str = "TRUE_SAME_CT,," + num_TP_pairs;
-        is_str_empty = FALSE;
-    } else {
-        str = append (str, "TRUE_SAME_CT,," + num_TP_pairs);
-    } 
-    str = append (str, "TRUE_SAME_PC,,"  + pct_TP_pairs);
-    str = append (str, "TRUE_DIFF_CT,,"  + num_TN_pairs);
-    str = append (str, "TRUE_DIFF_PC,,"  + pct_TN_pairs);
-    str = append (str, "FALSE_SAME_CT,," + num_FP_pairs);
-    str = append (str, "FALSE_SAME_PC,," + pct_FP_pairs);
-    str = append (str, "FALSE_DIFF_CT,," + num_FN_pairs);
-    str = append (str, "FALSE_DIFF_PC,," + pct_FN_pairs);
-    
-    pct_TP_pairs = round (pct_TP_pairs * 10000.0) / 10000.0;
-    pct_TN_pairs = round (pct_TN_pairs * 10000.0) / 10000.0;
-    pct_FP_pairs = round (pct_FP_pairs * 10000.0) / 10000.0;
-    pct_FN_pairs = round (pct_FN_pairs * 10000.0) / 10000.0;
-    
-    space_TP = "";  if (pct_TP_pairs < 100) {space_TP = " ";}  if (pct_TP_pairs < 10) {space_TP = "  ";}
-    space_TN = "";  if (pct_TN_pairs < 100) {space_TN = " ";}  if (pct_TN_pairs < 10) {space_TN = "  ";}
-    space_FP = "";  if (pct_FP_pairs < 100) {space_FP = " ";}  if (pct_FP_pairs < 10) {space_FP = "  ";}
-    space_FN = "";  if (pct_FN_pairs < 100) {space_FN = " ";}  if (pct_FN_pairs < 10) {space_FN = "  ";}
-
-    print ("Same-cluster pairs predicted as Same-cluster ( True Pos): " + space_TP
-        + pct_TP_pairs + "% of all pairs" + " (" + num_TP_pairs + ")");
-    print ("Diff-cluster pairs predicted as Diff-cluster ( True Neg): " + space_TN
-        + pct_TN_pairs + "% of all pairs" + " (" + num_TN_pairs + ")");
-    print ("Diff-cluster pairs predicted as Same-cluster (False Pos): " + space_FP
-        + pct_FP_pairs + "% of all pairs" + " (" + num_FP_pairs + ")");
-    print ("Same-cluster pairs predicted as Diff-cluster (False Neg): " + space_FN
-        + pct_FN_pairs + "% of all pairs" + " (" + num_FN_pairs + ")");
-        
-    [spY_cids, prY_cids, full_counts, matching_counts, rounded_percentages] =
-        get_best_assignments (spYprY_row_counts);
-        
-    print (" ");
-    print ("Specified Categories versus Predicted Clusters:");
-    
-    spY_cids = spY_cids + orig_min_spY - 1;
-    prY_cids = prY_cids + orig_min_prY - 1;
-    
-    for (i in 1 : nrow (spY_cids))
-    {
-        cid = as.integer (castAsScalar (spY_cids [i, 1]));
-        pct = castAsScalar (rounded_percentages [i, 1]);
-        space_pct = "";  if (pct < 100) {space_pct = " ";}  if (pct < 10) {space_pct = "  ";}
-        print ("Category " + cid + 
-            ":  best pred. cluster is " + as.integer (castAsScalar (prY_cids [i, 1])) + 
-            ";  full count = " + as.integer (castAsScalar (full_counts [i, 1])) + 
-            ",  matching count = " + space_pct + pct + "% (" +
-            as.integer (castAsScalar (matching_counts [i, 1])) + ")");
-            
-        str = append (str, "SPEC_TO_PRED,"  + cid + "," + castAsScalar (prY_cids [i, 1]));
-        str = append (str, "SPEC_FULL_CT,"  + cid + "," + castAsScalar (full_counts [i, 1]));
-        str = append (str, "SPEC_MATCH_CT," + cid + "," + castAsScalar (matching_counts [i, 1]));
-        str = append (str, "SPEC_MATCH_PC," + cid + "," + castAsScalar (rounded_percentages [i, 1]));
-    }
-
-    [prY_cids, spY_cids, full_counts, matching_counts, rounded_percentages] =
-        get_best_assignments (t(spYprY_row_counts));
-        
-    print (" ");
-    print ("Predicted Clusters versus Specified Categories:");
-    
-    prY_cids = prY_cids + orig_min_prY - 1;
-    spY_cids = spY_cids + orig_min_spY - 1;
-    
-    for (i in 1 : nrow (prY_cids))
-    {
-        cid = as.integer (castAsScalar (prY_cids [i, 1]));
-        pct = castAsScalar (rounded_percentages [i, 1]);
-        space_pct = "";  if (pct < 100) {space_pct = " ";}  if (pct < 10) {space_pct = "  ";}
-        print ("Cluster " + cid + 
-            ":  best spec. categ. is " + as.integer (castAsScalar (spY_cids [i, 1])) + 
-            ";  full count = " + as.integer (castAsScalar (full_counts [i, 1])) + 
-            ",  matching count = " + space_pct + pct + "% (" +
-            as.integer (castAsScalar (matching_counts [i, 1])) + ")");
-
-        str = append (str, "PRED_TO_SPEC,"  + cid + "," + castAsScalar (spY_cids [i, 1]));
-        str = append (str, "PRED_FULL_CT,"  + cid + "," + castAsScalar (full_counts [i, 1]));
-        str = append (str, "PRED_MATCH_CT," + cid + "," + castAsScalar (matching_counts [i, 1]));
-        str = append (str, "PRED_MATCH_PC," + cid + "," + castAsScalar (rounded_percentages [i, 1]));
-    }
-
-    print (" ");
-}}}
-
-if ((fileO != " ") & (! is_str_empty)) {
-    write (str, fileO);
-}
-
-print ("DONE: K-MEANS SCORING SCRIPT");
-
-
-
-get_best_assignments = function (Matrix[double] counts)
-return (Matrix[double] row_ids, Matrix[double] col_ids, Matrix[double] margins, 
-        Matrix[double] max_counts, Matrix[double] rounded_percentages)
-{
-    margins = rowSums (counts);
-    select_positive = diag (ppred (margins, 0, ">"));
-    select_positive = removeEmpty (target = select_positive, margin = "rows");
-    row_ids = select_positive %*% seq (1, nrow (margins), 1);
-    pos_counts = select_positive %*% counts;
-    pos_margins = select_positive %*% margins;
-    max_counts = rowMaxs (pos_counts);
-    one_per_column = matrix (1, rows = 1, cols = ncol (pos_counts));
-    max_counts_ppred = max_counts %*% one_per_column;
-    is_max_count = ppred (pos_counts, max_counts_ppred, "==");
-    aggr_is_max_count = t(cumsum (t(is_max_count)));
-    col_ids = rowSums (ppred (aggr_is_max_count, 0, "==")) + 1;
-    rounded_percentages = round (1000000.0 * max_counts / pos_margins) / 10000.0;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# Compares two categorical data vectors (presumed to be clusterings) and
+# counts matching/nonmatching same-cluster/different-cluster pairs of rows
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------
+# spY   String  " "     Location to read a column-vector with the "specified"
+#                       assignment of records (rows) to categories (clusters)
+# prY   String  " "     Location to read (or write, if X and C are present) a
+#                       column-vector with the "predicted" assignment of rows
+#                       to clusters.  NOTE: The same category may be labeled
+#                       differently in each of the two vectors, spY and prY.
+# fmt   String "text"   Matrix output format for prY, usually "text" or "csv"
+# X     String  " "     Location to read matrix X with the input data records
+# C     String  " "     Location to read matrix C with the cluster centroids
+#                       NOTE: If X and C are present, prY is an output file.
+# O     String  " "     Location to write the printed output statistics
+# ---------------------------------------------------------------------------
+#
+# The "O" file provides the output statistics in CSV format, one per line, in
+# the following format: NAME, [CID], VALUE.  Note:
+#   - The 1st group statistics are given if X input is available;
+#   - The 2nd group statistics are given if X and C inputs are available;
+#   - The 3rd and 4th group statistics are given if spY input is available;
+#   - Only the 4th group statistics contain a nonempty CID value;
+#   - When present, CID contains either the specified category label or the
+#     predicted cluster label.
+#
+# NAME            CID   MEANING
+# ---------------------------------------------------------------------------
+# TSS                   Total Sum of Squares (from the total mean)
+# WCSS_M                Within-Cluster  Sum of Squares (means as centers)
+# WCSS_M_PC             Within-Cluster  Sum of Squares (means), in % of TSS
+# BCSS_M                Between-Cluster Sum of Squares (means as centers)
+# BCSS_M_PC             Between-Cluster Sum of Squares (means), in % of TSS
+#
+# WCSS_C                Within-Cluster  Sum of Squares (centroids as centers)
+# WCSS_C_PC             Within-Cluster  Sum of Squares (centroids), % of TSS
+# BCSS_C                Between-Cluster Sum of Squares (centroids as centers)
+# BCSS_C_PC             Between-Cluster Sum of Squares (centroids), % of TSS
+# 
+# TRUE_SAME_CT          Same-category pairs predicted as Same-cluster, count
+# TRUE_SAME_PC          Same-category pairs predicted as Same-cluster, %
+# TRUE_DIFF_CT          Diff-category pairs predicted as Diff-cluster, count
+# TRUE_DIFF_PC          Diff-category pairs predicted as Diff-cluster, %
+# FALSE_SAME_CT         Diff-category pairs predicted as Same-cluster, count
+# FALSE_SAME_PC         Diff-category pairs predicted as Same-cluster, %
+# FALSE_DIFF_CT         Same-category pairs predicted as Diff-cluster, count
+# FALSE_DIFF_PC         Same-category pairs predicted as Diff-cluster, %
+# 
+# SPEC_TO_PRED     +    For specified category, the best predicted cluster id
+# SPEC_FULL_CT     +    For specified category, its full count
+# SPEC_MATCH_CT    +    For specified category, best-cluster matching count
+# SPEC_MATCH_PC    +    For specified category, % of matching to full count
+# PRED_TO_SPEC     +    For predicted cluster, the best specified category id
+# PRED_FULL_CT     +    For predicted cluster, its full count
+# PRED_MATCH_CT    +    For predicted cluster, best-category matching count
+# PRED_MATCH_PC    +    For predicted cluster, % of matching to full count
+# ---------------------------------------------------------------------------
+#
+# Examples:
+# 1. To predict Y given X and C:
+#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs X=INPUT_DIR/X
+#         C=INPUT_DIR/C prY=OUTPUT_DIR/PredY O=OUTPUT_DIR/stats
+# 2. To compare "actual" labels spY with "predicted" labels given X and C:
+#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs X=INPUT_DIR/X
+#         C=INPUT_DIR/C spY=INPUT_DIR/Y O=OUTPUT_DIR/stats
+# 3. To compare "actual" labels spY with given "predicted" labels prY:
+#     hadoop jar SystemML.jar -f Kmeans-predict.dml -nvargs spY=INPUT_DIR/Y
+#         prY=INPUT_DIR/PredY O=OUTPUT_DIR/stats
+
+
+fmt_prY = ifdef ($fmt, "text");
+filePrY = ifdef ($prY, " ");
+fileSpY = ifdef ($spY, " ");
+fileX   = ifdef ($X, " ");
+fileC   = ifdef ($C, " ");
+fileO   = ifdef ($O, " ");
+
+is_str_empty = TRUE;
+str = " ";
+
+print ("BEGIN K-MEANS SCORING SCRIPT");
+
+if (fileX != " ") {
+    print ("Reading X...");
+    X = read (fileX);
+    total_mean = colSums (X) / nrow (X);
+    total_ss = sum( (X - total_mean)^2 );
+}
+
+if ((fileC != " ") & (fileX == " ")) {
+    print ("ERROR: Cannot provide C without providing X.");
+} else {
+
+
+if (fileC != " ") {
+    print ("Reading C...");
+    C = read (fileC);
+    num_clusters = nrow (C);
+    ones_C = matrix (1, rows = num_clusters, cols = 1);
+    print ("Computing the predicted Y...");
+    D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
+    prY = rowIndexMin (D);
+    if (filePrY != " ") {
+        print ("Writing the predicted Y...");
+        write (prY, filePrY, format=fmt_prY);
+    }
+} else {
+    print ("Reading the predicted Y...");
+    prY = read (filePrY);
+    num_clusters = max (prY);
+    ones_C = matrix (1, rows = num_clusters, cols = 1);
+}
+
+if (fileX != " ") {
+    print ("Computing the WCSS...");
+    # Compute projection matrix from clusters to records
+    P = matrix (0, rows = nrow (X), cols = num_clusters);
+    P [, 1 : max (prY)] = table (seq (1, nrow (X), 1), prY);
+    # Compute the means, as opposed to the centroids
+    cluster_sizes = t(colSums (P));
+    record_of_ones = matrix (1, rows = 1, cols = ncol (X));
+    M = (t(P) %*% X) / ((cluster_sizes + ppred (cluster_sizes, 0, "==")) %*% record_of_ones);
+    # Compute the WCSS for the means
+    wcss_means = sum ((X - P %*% M) ^ 2);
+    wcss_means_pc = 100.0 * wcss_means / total_ss;
+    bcss_means = sum (cluster_sizes * rowSums ((M - ones_C %*% total_mean) ^ 2));
+    bcss_means_pc = 100.0 * bcss_means / total_ss;
+    # Output results
+    print ("Total Sum of Squares (TSS) = " + total_ss);
+    print ("WCSS for cluster means: " + (round (10000.0 * wcss_means_pc) / 10000.0) + "% of TSS = " + wcss_means);
+    print ("BCSS for cluster means: " + (round (10000.0 * bcss_means_pc) / 10000.0) + "% of TSS = " + bcss_means);
+    str = "TSS,," + total_ss;
+    str = append (str, "WCSS_M,," + wcss_means);
+    str = append (str, "WCSS_M_PC,," + wcss_means_pc);
+    str = append (str, "BCSS_M,," + bcss_means);
+    str = append (str, "BCSS_M_PC,," + bcss_means_pc);
+    is_str_empty = FALSE;
+}
+
+if (fileC != " ") {        
+    # Compute the WCSS for the centroids
+    wcss_centroids = sum ((X - P %*% C) ^ 2);
+    wcss_centroids_pc = 100.0 * wcss_centroids / total_ss;
+    bcss_centroids = sum (cluster_sizes * rowSums ((C - ones_C %*% total_mean) ^ 2));
+    bcss_centroids_pc = 100.0 * bcss_centroids / total_ss;
+    # Output results
+    print ("WCSS for centroids: " + (round (10000.0 * wcss_centroids_pc) / 10000.0) + "% of TSS = " + wcss_centroids);
+    print ("BCSS for centroids: " + (round (10000.0 * bcss_centroids_pc) / 10000.0) + "% of TSS = " + bcss_centroids);
+    str = append (str, "WCSS_C,," + wcss_centroids);
+    str = append (str, "WCSS_C_PC,," + wcss_centroids_pc);
+    str = append (str, "BCSS_C,," + bcss_centroids);
+    str = append (str, "BCSS_C_PC,," + bcss_centroids_pc);
+}
+
+
+
+if (fileSpY != " ") {
+
+print ("Reading the specified Y...");
+spY = read (fileSpY);
+num_records = nrow (spY);
+    
+if (num_records != nrow (prY) | ncol (spY) != 1 | ncol (prY) != 1) {
+    print ("ERROR: spY and/or prY size mismatch");
+    print ("nrow (spY) = " + nrow (spY) + ";  ncol (spY) = " + ncol (spY)
+      + ";  nrow (prY) = " + nrow (prY) + ";  ncol (prY) = " + ncol (prY));
+} else {
+
+    print ("Computing the pairs counts...");
+
+    orig_min_spY = min (spY);
+    orig_min_prY = min (prY);
+    spY = spY + (1 - orig_min_spY);
+    prY = prY + (1 - orig_min_prY);
+    
+    spYprY_row_counts = table (spY, prY);
+    spY_row_counts = rowSums (spYprY_row_counts);
+    prY_row_counts = t(colSums (spYprY_row_counts));
+
+    # Count all pairs of rows having the same (spY, prY)-values
+    spYprY_pair_counts = spYprY_row_counts * (spYprY_row_counts - 1) / 2;
+
+    # Count all pairs of rows having the same spY-values
+    spY_pair_counts = spY_row_counts * (spY_row_counts - 1) / 2;
+    # Count all pairs of rows having the same prY-values
+    prY_pair_counts = prY_row_counts * (prY_row_counts - 1) / 2;
+
+    num_pairs = num_records * (num_records - 1.0) / 2.0;
+
+    num_TP_pairs = sum (spYprY_pair_counts);
+    num_FP_pairs = sum (prY_pair_counts) - num_TP_pairs;
+    num_FN_pairs = sum (spY_pair_counts) - num_TP_pairs;
+    num_TN_pairs = num_pairs - num_TP_pairs - num_FP_pairs - num_FN_pairs;
+    
+    pct_TP_pairs = num_TP_pairs / num_pairs * 100.0;
+    pct_TN_pairs = num_TN_pairs / num_pairs * 100.0;
+    pct_FP_pairs = num_FP_pairs / num_pairs * 100.0;
+    pct_FN_pairs = num_FN_pairs / num_pairs * 100.0;
+    
+    if (is_str_empty) {
+        str = "TRUE_SAME_CT,," + num_TP_pairs;
+        is_str_empty = FALSE;
+    } else {
+        str = append (str, "TRUE_SAME_CT,," + num_TP_pairs);
+    } 
+    str = append (str, "TRUE_SAME_PC,,"  + pct_TP_pairs);
+    str = append (str, "TRUE_DIFF_CT,,"  + num_TN_pairs);
+    str = append (str, "TRUE_DIFF_PC,,"  + pct_TN_pairs);
+    str = append (str, "FALSE_SAME_CT,," + num_FP_pairs);
+    str = append (str, "FALSE_SAME_PC,," + pct_FP_pairs);
+    str = append (str, "FALSE_DIFF_CT,," + num_FN_pairs);
+    str = append (str, "FALSE_DIFF_PC,," + pct_FN_pairs);
+    
+    pct_TP_pairs = round (pct_TP_pairs * 10000.0) / 10000.0;
+    pct_TN_pairs = round (pct_TN_pairs * 10000.0) / 10000.0;
+    pct_FP_pairs = round (pct_FP_pairs * 10000.0) / 10000.0;
+    pct_FN_pairs = round (pct_FN_pairs * 10000.0) / 10000.0;
+    
+    space_TP = "";  if (pct_TP_pairs < 100) {space_TP = " ";}  if (pct_TP_pairs < 10) {space_TP = "  ";}
+    space_TN = "";  if (pct_TN_pairs < 100) {space_TN = " ";}  if (pct_TN_pairs < 10) {space_TN = "  ";}
+    space_FP = "";  if (pct_FP_pairs < 100) {space_FP = " ";}  if (pct_FP_pairs < 10) {space_FP = "  ";}
+    space_FN = "";  if (pct_FN_pairs < 100) {space_FN = " ";}  if (pct_FN_pairs < 10) {space_FN = "  ";}
+
+    print ("Same-cluster pairs predicted as Same-cluster ( True Pos): " + space_TP
+        + pct_TP_pairs + "% of all pairs" + " (" + num_TP_pairs + ")");
+    print ("Diff-cluster pairs predicted as Diff-cluster ( True Neg): " + space_TN
+        + pct_TN_pairs + "% of all pairs" + " (" + num_TN_pairs + ")");
+    print ("Diff-cluster pairs predicted as Same-cluster (False Pos): " + space_FP
+        + pct_FP_pairs + "% of all pairs" + " (" + num_FP_pairs + ")");
+    print ("Same-cluster pairs predicted as Diff-cluster (False Neg): " + space_FN
+        + pct_FN_pairs + "% of all pairs" + " (" + num_FN_pairs + ")");
+        
+    [spY_cids, prY_cids, full_counts, matching_counts, rounded_percentages] =
+        get_best_assignments (spYprY_row_counts);
+        
+    print (" ");
+    print ("Specified Categories versus Predicted Clusters:");
+    
+    spY_cids = spY_cids + orig_min_spY - 1;
+    prY_cids = prY_cids + orig_min_prY - 1;
+    
+    for (i in 1 : nrow (spY_cids))
+    {
+        cid = as.integer (castAsScalar (spY_cids [i, 1]));
+        pct = castAsScalar (rounded_percentages [i, 1]);
+        space_pct = "";  if (pct < 100) {space_pct = " ";}  if (pct < 10) {space_pct = "  ";}
+        print ("Category " + cid + 
+            ":  best pred. cluster is " + as.integer (castAsScalar (prY_cids [i, 1])) + 
+            ";  full count = " + as.integer (castAsScalar (full_counts [i, 1])) + 
+            ",  matching count = " + space_pct + pct + "% (" +
+            as.integer (castAsScalar (matching_counts [i, 1])) + ")");
+            
+        str = append (str, "SPEC_TO_PRED,"  + cid + "," + castAsScalar (prY_cids [i, 1]));
+        str = append (str, "SPEC_FULL_CT,"  + cid + "," + castAsScalar (full_counts [i, 1]));
+        str = append (str, "SPEC_MATCH_CT," + cid + "," + castAsScalar (matching_counts [i, 1]));
+        str = append (str, "SPEC_MATCH_PC," + cid + "," + castAsScalar (rounded_percentages [i, 1]));
+    }
+
+    [prY_cids, spY_cids, full_counts, matching_counts, rounded_percentages] =
+        get_best_assignments (t(spYprY_row_counts));
+        
+    print (" ");
+    print ("Predicted Clusters versus Specified Categories:");
+    
+    prY_cids = prY_cids + orig_min_prY - 1;
+    spY_cids = spY_cids + orig_min_spY - 1;
+    
+    for (i in 1 : nrow (prY_cids))
+    {
+        cid = as.integer (castAsScalar (prY_cids [i, 1]));
+        pct = castAsScalar (rounded_percentages [i, 1]);
+        space_pct = "";  if (pct < 100) {space_pct = " ";}  if (pct < 10) {space_pct = "  ";}
+        print ("Cluster " + cid + 
+            ":  best spec. categ. is " + as.integer (castAsScalar (spY_cids [i, 1])) + 
+            ";  full count = " + as.integer (castAsScalar (full_counts [i, 1])) + 
+            ",  matching count = " + space_pct + pct + "% (" +
+            as.integer (castAsScalar (matching_counts [i, 1])) + ")");
+
+        str = append (str, "PRED_TO_SPEC,"  + cid + "," + castAsScalar (spY_cids [i, 1]));
+        str = append (str, "PRED_FULL_CT,"  + cid + "," + castAsScalar (full_counts [i, 1]));
+        str = append (str, "PRED_MATCH_CT," + cid + "," + castAsScalar (matching_counts [i, 1]));
+        str = append (str, "PRED_MATCH_PC," + cid + "," + castAsScalar (rounded_percentages [i, 1]));
+    }
+
+    print (" ");
+}}}
+
+if ((fileO != " ") & (! is_str_empty)) {
+    write (str, fileO);
+}
+
+print ("DONE: K-MEANS SCORING SCRIPT");
+
+
+
+get_best_assignments = function (Matrix[double] counts)
+return (Matrix[double] row_ids, Matrix[double] col_ids, Matrix[double] margins, 
+        Matrix[double] max_counts, Matrix[double] rounded_percentages)
+{
+    margins = rowSums (counts);
+    select_positive = diag (ppred (margins, 0, ">"));
+    select_positive = removeEmpty (target = select_positive, margin = "rows");
+    row_ids = select_positive %*% seq (1, nrow (margins), 1);
+    pos_counts = select_positive %*% counts;
+    pos_margins = select_positive %*% margins;
+    max_counts = rowMaxs (pos_counts);
+    one_per_column = matrix (1, rows = 1, cols = ncol (pos_counts));
+    max_counts_ppred = max_counts %*% one_per_column;
+    is_max_count = ppred (pos_counts, max_counts_ppred, "==");
+    aggr_is_max_count = t(cumsum (t(is_max_count)));
+    col_ids = rowSums (ppred (aggr_is_max_count, 0, "==")) + 1;
+    rounded_percentages = round (1000000.0 * max_counts / pos_margins) / 10000.0;
+}
+


[08/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteColSumsMVMult.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteColSumsMVMult.dml b/src/test/scripts/functions/misc/RewriteColSumsMVMult.dml
index 8dd9dc3..8c5eda3 100644
--- a/src/test/scripts/functions/misc/RewriteColSumsMVMult.dml
+++ b/src/test/scripts/functions/misc/RewriteColSumsMVMult.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-R = colSums( X * seq(1,nrow(X)) );
-
+
+X = read($1);
+R = colSums( X * seq(1,nrow(X)) );
+
 write(R, $2);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteRowSumsMVMult.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteRowSumsMVMult.R b/src/test/scripts/functions/misc/RewriteRowSumsMVMult.R
index c40c8da..d06adc6 100644
--- a/src/test/scripts/functions/misc/RewriteRowSumsMVMult.R
+++ b/src/test/scripts/functions/misc/RewriteRowSumsMVMult.R
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-R = rowSums( X * matrix(1,nrow(X),1) %*% t(seq(1,ncol(X))) );
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+R = rowSums( X * matrix(1,nrow(X),1) %*% t(seq(1,ncol(X))) );
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteRowSumsMVMult.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteRowSumsMVMult.dml b/src/test/scripts/functions/misc/RewriteRowSumsMVMult.dml
index c7fac95..8be42ac 100644
--- a/src/test/scripts/functions/misc/RewriteRowSumsMVMult.dml
+++ b/src/test/scripts/functions/misc/RewriteRowSumsMVMult.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-R = rowSums( X * t(seq(1,ncol(X))) );
-
+
+X = read($1);
+R = rowSums( X * t(seq(1,ncol(X))) );
+
 write(R, $2);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.R b/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.R
index ac92edc..70ec017 100644
--- a/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.R
+++ b/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.R
@@ -1,33 +1,33 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")));
-B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
-
-C = A %*% B;
-R = C[1,1];
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")));
+B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")));
+
+C = A %*% B;
+R = C[1,1];
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.dml b/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.dml
index 8cec002..130d5ff 100644
--- a/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.dml
+++ b/src/test/scripts/functions/misc/RewriteSlicedMatrixMult.dml
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = read($1);
-B = read($2);
-
-C = A %*% B;
-R = C[1,1];
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+B = read($2);
+
+C = A %*% B;
+R = C[1,1];
+
 write(R, $3);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ScalarFunctionTest1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ScalarFunctionTest1.R b/src/test/scripts/functions/misc/ScalarFunctionTest1.R
index 50a10b0..5d48d0b 100644
--- a/src/test/scripts/functions/misc/ScalarFunctionTest1.R
+++ b/src/test/scripts/functions/misc/ScalarFunctionTest1.R
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-square <- function(a) {
-   b = a*a;   
-   return(b);
-}
-
-y = square(1.9/2.9);
-R = as.matrix(y);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+square <- function(a) {
+   b = a*a;   
+   return(b);
+}
+
+y = square(1.9/2.9);
+R = as.matrix(y);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ScalarFunctionTest1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ScalarFunctionTest1.dml b/src/test/scripts/functions/misc/ScalarFunctionTest1.dml
index a11fff8..75c5db2 100644
--- a/src/test/scripts/functions/misc/ScalarFunctionTest1.dml
+++ b/src/test/scripts/functions/misc/ScalarFunctionTest1.dml
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-square = function (double a) return (double b) {
-  b = a*a;
-}
-
-y = square(1.9/2.9);
-R = as.matrix(y);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+square = function (double a) return (double b) {
+  b = a*a;
+}
+
+y = square(1.9/2.9);
+R = as.matrix(y);
+
 write(R, $1);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ScalarFunctionTest2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ScalarFunctionTest2.R b/src/test/scripts/functions/misc/ScalarFunctionTest2.R
index dd3f323..ac3c5f2 100644
--- a/src/test/scripts/functions/misc/ScalarFunctionTest2.R
+++ b/src/test/scripts/functions/misc/ScalarFunctionTest2.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-square <- function(a) {
-   b = a*a;   
-   return(b);
-}
-
-x = 1.9/2.9;
-y = square(x);
-R = as.matrix(y);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+square <- function(a) {
+   b = a*a;   
+   return(b);
+}
+
+x = 1.9/2.9;
+y = square(x);
+R = as.matrix(y);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ScalarFunctionTest2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ScalarFunctionTest2.dml b/src/test/scripts/functions/misc/ScalarFunctionTest2.dml
index e51ef18..bfe6208 100644
--- a/src/test/scripts/functions/misc/ScalarFunctionTest2.dml
+++ b/src/test/scripts/functions/misc/ScalarFunctionTest2.dml
@@ -1,30 +1,30 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-square = function (double a) return (double b) {
-  b = a*a;
-}
-
-x = 1.9/2.9;
-y = square(x);
-R = as.matrix(y);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+square = function (double a) return (double b) {
+  b = a*a;
+}
+
+x = 1.9/2.9;
+y = square(x);
+R = as.matrix(y);
+
 write(R, $1);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/TableExpandTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/TableExpandTest.R b/src/test/scripts/functions/misc/TableExpandTest.R
index 5197d3b..b6d74b8 100644
--- a/src/test/scripts/functions/misc/TableExpandTest.R
+++ b/src/test/scripts/functions/misc/TableExpandTest.R
@@ -1,41 +1,41 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-m = as.integer(args[2]);
-left = as.logical(args[3]);
-
-#note: this outer formulation is equivalent to table but does not 
-#  require padding and convertion from R's factor-based table
-
-if( left ){
-   C = outer(as.vector(A), seq(1,m), "==");
-} else {
-   C = outer(seq(1,m), as.vector(A), "==");
-}
-
-
-writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[4], "C", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+m = as.integer(args[2]);
+left = as.logical(args[3]);
+
+#note: this outer formulation is equivalent to table but does not 
+#  require padding and convertion from R's factor-based table
+
+if( left ){
+   C = outer(as.vector(A), seq(1,m), "==");
+} else {
+   C = outer(seq(1,m), as.vector(A), "==");
+}
+
+
+writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[4], "C", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/TableExpandTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/TableExpandTest.dml b/src/test/scripts/functions/misc/TableExpandTest.dml
index 9df7188..4b7299b 100644
--- a/src/test/scripts/functions/misc/TableExpandTest.dml
+++ b/src/test/scripts/functions/misc/TableExpandTest.dml
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = read($1);
-m = $2;
-left = $3;
-
-if( left ){
-   C = table(seq(1,nrow(A)), A, nrow(A), m);
-}
-else{
-   C = table(A, seq(1,nrow(A)), m, nrow(A));
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+m = $2;
+left = $3;
+
+if( left ){
+   C = table(seq(1,nrow(A)), A, nrow(A), m);
+}
+else{
+   C = table(A, seq(1,nrow(A)), m, nrow(A));
+}
+
 write(C, $4);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/WhileScalarAssignmentTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/WhileScalarAssignmentTest.dml b/src/test/scripts/functions/misc/WhileScalarAssignmentTest.dml
index 6e0977b..858f6c0 100644
--- a/src/test/scripts/functions/misc/WhileScalarAssignmentTest.dml
+++ b/src/test/scripts/functions/misc/WhileScalarAssignmentTest.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-val = $1;
-i = 1;
-while( i<=10 ){
-   val = $1;
-   i = i+1;
-}
-
+
+val = $1;
+i = 1;
+while( i<=10 ){
+   val = $1;
+   i = i+1;
+}
+
 print(val);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/castBoolean.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/castBoolean.dml b/src/test/scripts/functions/misc/castBoolean.dml
index 39bb9e8..9c5bdd2 100644
--- a/src/test/scripts/functions/misc/castBoolean.dml
+++ b/src/test/scripts/functions/misc/castBoolean.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-Y = as.logical(X);
-write(Y, $2);
+
+X = read($1);
+Y = as.logical(X);
+write(Y, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/castDouble.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/castDouble.dml b/src/test/scripts/functions/misc/castDouble.dml
index 7986bdc..d4d92af 100644
--- a/src/test/scripts/functions/misc/castDouble.dml
+++ b/src/test/scripts/functions/misc/castDouble.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-Y = as.double(X);
-write(Y, $2);
+
+X = read($1);
+Y = as.double(X);
+write(Y, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/castInteger.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/castInteger.dml b/src/test/scripts/functions/misc/castInteger.dml
index c0deb89..d68baa3 100644
--- a/src/test/scripts/functions/misc/castInteger.dml
+++ b/src/test/scripts/functions/misc/castInteger.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-Y = as.integer(X);
-write(Y, $2);
+
+X = read($1);
+Y = as.integer(X);
+write(Y, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/castMatrixScalar.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/castMatrixScalar.dml b/src/test/scripts/functions/misc/castMatrixScalar.dml
index 06c82c9..cc031a5 100644
--- a/src/test/scripts/functions/misc/castMatrixScalar.dml
+++ b/src/test/scripts/functions/misc/castMatrixScalar.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3);
-x = as.scalar(X);
-write(x, $4);
+
+X = read($1, rows=$2, cols=$3);
+x = as.scalar(X);
+write(x, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/castScalarMatrix.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/castScalarMatrix.dml b/src/test/scripts/functions/misc/castScalarMatrix.dml
index bc60cca..2e5e4c2 100644
--- a/src/test/scripts/functions/misc/castScalarMatrix.dml
+++ b/src/test/scripts/functions/misc/castScalarMatrix.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-x = read($1, rows=$2, cols=$3);
-X = as.matrix(x);
-write(X, $4);
+
+x = read($1, rows=$2, cols=$3);
+X = as.matrix(x);
+write(X, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalPredicateIf.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalPredicateIf.dml b/src/test/scripts/functions/misc/conditionalPredicateIf.dml
index ee607b7..a736aee 100644
--- a/src/test/scripts/functions/misc/conditionalPredicateIf.dml
+++ b/src/test/scripts/functions/misc/conditionalPredicateIf.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-val1 = $1;
-
-# test auto casting constant propagation
-if( val1 ) {
-   val1 = 1;
-}
-val1 = val1 + 7.0; 
-
-# test auto casting with variables
-if( val1 ) {
-   val1 = 1;
+
+val1 = $1;
+
+# test auto casting constant propagation
+if( val1 ) {
+   val1 = 1;
+}
+val1 = val1 + 7.0; 
+
+# test auto casting with variables
+if( val1 ) {
+   val1 = 1;
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalPredicateWhile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalPredicateWhile.dml b/src/test/scripts/functions/misc/conditionalPredicateWhile.dml
index 380fbb4..d0b21c5 100644
--- a/src/test/scripts/functions/misc/conditionalPredicateWhile.dml
+++ b/src/test/scripts/functions/misc/conditionalPredicateWhile.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-val1 = $1;
-
-# test auto casting with variables
-while( val1 ) {
-   val1 = FALSE;
-}
+
+val1 = $1;
+
+# test auto casting with variables
+while( val1 ) {
+   val1 = FALSE;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalValidate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalValidate1.dml b/src/test/scripts/functions/misc/conditionalValidate1.dml
index 9e7946b..e6e03bf 100644
--- a/src/test/scripts/functions/misc/conditionalValidate1.dml
+++ b/src/test/scripts/functions/misc/conditionalValidate1.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-Y = read($1);
-
-print("Result: "+sum(Y));
+
+Y = read($1);
+
+print("Result: "+sum(Y));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalValidate2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalValidate2.dml b/src/test/scripts/functions/misc/conditionalValidate2.dml
index 12e7845..5f65769 100644
--- a/src/test/scripts/functions/misc/conditionalValidate2.dml
+++ b/src/test/scripts/functions/misc/conditionalValidate2.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-if( 1==0 )
-{
-   Y = read($1);
-}
-else
-{
-   Y = matrix(1, rows=10, cols=10);
-}
-
-print("Result: "+sum(Y));
+
+if( 1==0 )
+{
+   Y = read($1);
+}
+else
+{
+   Y = matrix(1, rows=10, cols=10);
+}
+
+print("Result: "+sum(Y));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalValidate3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalValidate3.dml b/src/test/scripts/functions/misc/conditionalValidate3.dml
index e912035..94d8cdc 100644
--- a/src/test/scripts/functions/misc/conditionalValidate3.dml
+++ b/src/test/scripts/functions/misc/conditionalValidate3.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-
-for( i in 1:0 ) {
-   Y = read($1);
-}   
-   
-print("Result: "+sum(Y));
+
+Y = matrix(1, rows=10, cols=10);
+
+
+for( i in 1:0 ) {
+   Y = read($1);
+}   
+   
+print("Result: "+sum(Y));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/conditionalValidate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/conditionalValidate4.dml b/src/test/scripts/functions/misc/conditionalValidate4.dml
index 1aba752..aff86ec 100644
--- a/src/test/scripts/functions/misc/conditionalValidate4.dml
+++ b/src/test/scripts/functions/misc/conditionalValidate4.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-i = 1;
-while( i<1 ) {
-   Y = read($1);
-}   
-   
-print("Result: "+sum(Y));
+
+Y = matrix(1, rows=10, cols=10);
+
+i = 1;
+while( i<1 ) {
+   Y = read($1);
+}   
+   
+print("Result: "+sum(Y));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1a.dml b/src/test/scripts/functions/misc/dt_change_1a.dml
index aa3ef32..e98f2c6 100644
--- a/src/test/scripts/functions/misc/dt_change_1a.dml
+++ b/src/test/scripts/functions/misc/dt_change_1a.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1==1 ) {
-   X = 7;
-}
-else {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1==1 ) {
+   X = 7;
+}
+else {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1b.dml b/src/test/scripts/functions/misc/dt_change_1b.dml
index 0c05a84..cb3923a 100644
--- a/src/test/scripts/functions/misc/dt_change_1b.dml
+++ b/src/test/scripts/functions/misc/dt_change_1b.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1==1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-else {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1==1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+else {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1c.dml b/src/test/scripts/functions/misc/dt_change_1c.dml
index 73ccc67..9279e81 100644
--- a/src/test/scripts/functions/misc/dt_change_1c.dml
+++ b/src/test/scripts/functions/misc/dt_change_1c.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1==1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-else {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1==1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+else {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1d.dml b/src/test/scripts/functions/misc/dt_change_1d.dml
index 6992a6f..bf05714 100644
--- a/src/test/scripts/functions/misc/dt_change_1d.dml
+++ b/src/test/scripts/functions/misc/dt_change_1d.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1==1 ) {
-   X = 7;
-}
-else {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1==1 ) {
+   X = 7;
+}
+else {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1e.dml b/src/test/scripts/functions/misc/dt_change_1e.dml
index 4e39aeb..c2a69fa 100644
--- a/src/test/scripts/functions/misc/dt_change_1e.dml
+++ b/src/test/scripts/functions/misc/dt_change_1e.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1!=1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-else {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1!=1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+else {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1f.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1f.dml b/src/test/scripts/functions/misc/dt_change_1f.dml
index 712f504..bfdfc61 100644
--- a/src/test/scripts/functions/misc/dt_change_1f.dml
+++ b/src/test/scripts/functions/misc/dt_change_1f.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-
-if( 1!=1 ) {
-   X = 7;
-}
-else {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+
+if( 1!=1 ) {
+   X = 7;
+}
+else {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1g.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1g.dml b/src/test/scripts/functions/misc/dt_change_1g.dml
index 67acdd7..f0b640a 100644
--- a/src/test/scripts/functions/misc/dt_change_1g.dml
+++ b/src/test/scripts/functions/misc/dt_change_1g.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-
-if( 1==1 ) {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+
+if( 1==1 ) {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_1h.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_1h.dml b/src/test/scripts/functions/misc/dt_change_1h.dml
index 2230427..cb8403d 100644
--- a/src/test/scripts/functions/misc/dt_change_1h.dml
+++ b/src/test/scripts/functions/misc/dt_change_1h.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-
-if( 1==1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+
+if( 1==1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2a.dml b/src/test/scripts/functions/misc/dt_change_2a.dml
index 0cfa128..86966aa 100644
--- a/src/test/scripts/functions/misc/dt_change_2a.dml
+++ b/src/test/scripts/functions/misc/dt_change_2a.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-
-for( i in 1:1 ) {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+
+for( i in 1:1 ) {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2b.dml b/src/test/scripts/functions/misc/dt_change_2b.dml
index 7737c3e..5a775a3 100644
--- a/src/test/scripts/functions/misc/dt_change_2b.dml
+++ b/src/test/scripts/functions/misc/dt_change_2b.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-
-for( i in 1:1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+
+for( i in 1:1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2c.dml b/src/test/scripts/functions/misc/dt_change_2c.dml
index dadda6e..ebc6079 100644
--- a/src/test/scripts/functions/misc/dt_change_2c.dml
+++ b/src/test/scripts/functions/misc/dt_change_2c.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-
-for( i in 1:1 ) {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+
+for( i in 1:1 ) {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2d.dml b/src/test/scripts/functions/misc/dt_change_2d.dml
index 03f3ae3..ba0d50b 100644
--- a/src/test/scripts/functions/misc/dt_change_2d.dml
+++ b/src/test/scripts/functions/misc/dt_change_2d.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-
-for( i in 1:1 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+
+for( i in 1:1 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2e.dml b/src/test/scripts/functions/misc/dt_change_2e.dml
index 7d7e6a0..3efba8f 100644
--- a/src/test/scripts/functions/misc/dt_change_2e.dml
+++ b/src/test/scripts/functions/misc/dt_change_2e.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-
-for( i in 1:0 ) {
-   X = 7;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+
+for( i in 1:0 ) {
+   X = 7;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_2f.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_2f.dml b/src/test/scripts/functions/misc/dt_change_2f.dml
index 0826d62..3a71f05 100644
--- a/src/test/scripts/functions/misc/dt_change_2f.dml
+++ b/src/test/scripts/functions/misc/dt_change_2f.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-
-for( i in 1:0 ) {
-   X = matrix(7, rows=10, cols=10);
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+
+for( i in 1:0 ) {
+   X = matrix(7, rows=10, cols=10);
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3a.dml b/src/test/scripts/functions/misc/dt_change_3a.dml
index 0f35e4b..334404c 100644
--- a/src/test/scripts/functions/misc/dt_change_3a.dml
+++ b/src/test/scripts/functions/misc/dt_change_3a.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-i = 1;
-
-while( i<=1 ) {
-   X = 7;
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+i = 1;
+
+while( i<=1 ) {
+   X = 7;
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3b.dml b/src/test/scripts/functions/misc/dt_change_3b.dml
index d729675..652c7d4 100644
--- a/src/test/scripts/functions/misc/dt_change_3b.dml
+++ b/src/test/scripts/functions/misc/dt_change_3b.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-
-i = 1;
-
-while( i<=1 ) {
-   X = matrix(7, rows=10, cols=10);
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+
+i = 1;
+
+while( i<=1 ) {
+   X = matrix(7, rows=10, cols=10);
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3c.dml b/src/test/scripts/functions/misc/dt_change_3c.dml
index b31b616..ab17a3f 100644
--- a/src/test/scripts/functions/misc/dt_change_3c.dml
+++ b/src/test/scripts/functions/misc/dt_change_3c.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-i = 1;
-
-while( i<=1 ) {
-   X = 7;
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+i = 1;
+
+while( i<=1 ) {
+   X = 7;
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3d.dml b/src/test/scripts/functions/misc/dt_change_3d.dml
index 099b107..405184c 100644
--- a/src/test/scripts/functions/misc/dt_change_3d.dml
+++ b/src/test/scripts/functions/misc/dt_change_3d.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-i = 1;
-
-while( i<=1 ) {
-   X = matrix(7, rows=10, cols=10);
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+i = 1;
+
+while( i<=1 ) {
+   X = matrix(7, rows=10, cols=10);
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3e.dml b/src/test/scripts/functions/misc/dt_change_3e.dml
index 4278af9..0e7a749 100644
--- a/src/test/scripts/functions/misc/dt_change_3e.dml
+++ b/src/test/scripts/functions/misc/dt_change_3e.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-i = 1;
-
-while( i<=0 ) {
-   X = 7;
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+i = 1;
+
+while( i<=0 ) {
+   X = 7;
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_3f.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_3f.dml b/src/test/scripts/functions/misc/dt_change_3f.dml
index 3096e71..3655aa6 100644
--- a/src/test/scripts/functions/misc/dt_change_3f.dml
+++ b/src/test/scripts/functions/misc/dt_change_3f.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-i = 1;
-
-while( i<=0 ) {
-   X = matrix(7, rows=10, cols=10);
-   i = i+1;
-}
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+i = 1;
+
+while( i<=0 ) {
+   X = matrix(7, rows=10, cols=10);
+   i = i+1;
+}
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4a.dml b/src/test/scripts/functions/misc/dt_change_4a.dml
index 7702122..6d0fff5 100644
--- a/src/test/scripts/functions/misc/dt_change_4a.dml
+++ b/src/test/scripts/functions/misc/dt_change_4a.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-X = matrix(X, rows=10, cols=10);
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+X = matrix(X, rows=10, cols=10);
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4b.dml b/src/test/scripts/functions/misc/dt_change_4b.dml
index 4381ad4..c5d8c7e 100644
--- a/src/test/scripts/functions/misc/dt_change_4b.dml
+++ b/src/test/scripts/functions/misc/dt_change_4b.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-X = castAsScalar(X[1,1]);
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+X = castAsScalar(X[1,1]);
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4c.dml b/src/test/scripts/functions/misc/dt_change_4c.dml
index 6067ede..ba561bf 100644
--- a/src/test/scripts/functions/misc/dt_change_4c.dml
+++ b/src/test/scripts/functions/misc/dt_change_4c.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function(Matrix[Double] input) return (Double out) 
-{
-   if( 1==1 ){} #prevent inlining
-  
-   out = castAsScalar(input[1,1]);
-}
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-X = foo(X);
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+foo = function(Matrix[Double] input) return (Double out) 
+{
+   if( 1==1 ){} #prevent inlining
+  
+   out = castAsScalar(input[1,1]);
+}
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+X = foo(X);
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4d.dml b/src/test/scripts/functions/misc/dt_change_4d.dml
index feb1a47..e04683d 100644
--- a/src/test/scripts/functions/misc/dt_change_4d.dml
+++ b/src/test/scripts/functions/misc/dt_change_4d.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function(Integer input) return (Matrix[Double] out) 
-{
-   if( 1==1 ){} #prevent inlining
-   out = matrix(1, rows=10, cols=10);
-   out = out*input;
-   #out = matrix(input, rows=10, cols=10); unsupported expression in rand
-}
-
-Y = matrix(1, rows=10, cols=10);
-X = 7;
-X = foo(X);
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+foo = function(Integer input) return (Matrix[Double] out) 
+{
+   if( 1==1 ){} #prevent inlining
+   out = matrix(1, rows=10, cols=10);
+   out = out*input;
+   #out = matrix(input, rows=10, cols=10); unsupported expression in rand
+}
+
+Y = matrix(1, rows=10, cols=10);
+X = 7;
+X = foo(X);
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4e.dml b/src/test/scripts/functions/misc/dt_change_4e.dml
index 68c29af..413bc12 100644
--- a/src/test/scripts/functions/misc/dt_change_4e.dml
+++ b/src/test/scripts/functions/misc/dt_change_4e.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-if(1==1){}
-X = 7;
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+if(1==1){}
+X = 7;
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/dt_change_4f.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/dt_change_4f.dml b/src/test/scripts/functions/misc/dt_change_4f.dml
index 04138c5..71f083c 100644
--- a/src/test/scripts/functions/misc/dt_change_4f.dml
+++ b/src/test/scripts/functions/misc/dt_change_4f.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-Y = matrix(1, rows=10, cols=10);
-X = matrix(7, rows=10, cols=10);
-if(1==1){}
-X = castAsScalar(X[1,1]);
-
-print("Result: "+sum(X + Y));
-#expected: "Result: 800.0"
+
+Y = matrix(1, rows=10, cols=10);
+X = matrix(7, rows=10, cols=10);
+if(1==1){}
+X = castAsScalar(X[1,1]);
+
+print("Result: "+sum(X + Y));
+#expected: "Result: 800.0"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/functionInlining.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/functionInlining.dml b/src/test/scripts/functions/misc/functionInlining.dml
index 81bfc7d..fe7dd73 100644
--- a/src/test/scripts/functions/misc/functionInlining.dml
+++ b/src/test/scripts/functions/misc/functionInlining.dml
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function(Double input) return (Double out)
-{
-   out = input + input; #will be inlined
-}
-
-foo2 = function(Integer input) return (Double out)
-{ 
-   out = input + input; #will be inlined
-}
-
-x = $1;
-
-if( 1!=1 ){
-  x = x;
-}
-
-ret1 = foo(x);
-ret2 = foo2(x);
- 
-print("Result1: "+ret1);
-print("Result2: "+ret2);
+
+foo = function(Double input) return (Double out)
+{
+   out = input + input; #will be inlined
+}
+
+foo2 = function(Integer input) return (Double out)
+{ 
+   out = input + input; #will be inlined
+}
+
+x = $1;
+
+if( 1!=1 ){
+  x = x;
+}
+
+ret1 = foo(x);
+ret2 = foo2(x);
+ 
+print("Result1: "+ret1);
+print("Result2: "+ret2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/functionNoInlining.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/functionNoInlining.dml b/src/test/scripts/functions/misc/functionNoInlining.dml
index 97ff1e8..cdcc67b 100644
--- a/src/test/scripts/functions/misc/functionNoInlining.dml
+++ b/src/test/scripts/functions/misc/functionNoInlining.dml
@@ -19,29 +19,29 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function(Double input) return (Double out)
-{
-   if( 1==1 ){ #prevent inlining
-      out = input + input;
-   }
-}
-
-foo2 = function(Integer input) return (Double out)
-{ 
-   if( 1==1 ){ #prevent inlining
-      out = input + input;
-   }
-}
-
-x = $1;
-
-if( 1!=1 ){
-  x = 7;
-}
-
-ret1 = foo(x);
-ret2 = foo2(x);
- 
-print("Result1: "+ret1);
-print("Result2: "+ret2);
+
+foo = function(Double input) return (Double out)
+{
+   if( 1==1 ){ #prevent inlining
+      out = input + input;
+   }
+}
+
+foo2 = function(Integer input) return (Double out)
+{ 
+   if( 1==1 ){ #prevent inlining
+      out = input + input;
+   }
+}
+
+x = $1;
+
+if( 1!=1 ){
+  x = 7;
+}
+
+ret1 = foo(x);
+ret2 = foo2(x);
+ 
+print("Result1: "+ret1);
+print("Result2: "+ret2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/function_chain_inlining.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/function_chain_inlining.dml b/src/test/scripts/functions/misc/function_chain_inlining.dml
index b051599..10a084c 100644
--- a/src/test/scripts/functions/misc/function_chain_inlining.dml
+++ b/src/test/scripts/functions/misc/function_chain_inlining.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo2(B+2);
-}
-foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = B+B;
-}
-
-X = matrix($3, rows=$1, cols=$2);
-Y = foo1(X);
-z = sum(Y);
-
-write(z, $4);
+
+foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo2(B+2);
+}
+foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = B+B;
+}
+
+X = matrix($3, rows=$1, cols=$2);
+Y = foo1(X);
+z = sum(Y);
+
+write(z, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/function_chain_non_inlining.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/function_chain_non_inlining.dml b/src/test/scripts/functions/misc/function_chain_non_inlining.dml
index fac2e7b..896513f 100644
--- a/src/test/scripts/functions/misc/function_chain_non_inlining.dml
+++ b/src/test/scripts/functions/misc/function_chain_non_inlining.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo2(B+2);
-}
-foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   if(sum(B)>0){
-      V = B+B;
-   }
-   else{
-      V = B
-   }
-}
-
-X = matrix($3, rows=$1, cols=$2);
-Y = foo1(X);
-z = sum(Y);
-
-write(z, $4);
+
+foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo2(B+2);
+}
+foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   if(sum(B)>0){
+      V = B+B;
+   }
+   else{
+      V = B
+   }
+}
+
+X = matrix($3, rows=$1, cols=$2);
+Y = foo1(X);
+z = sum(Y);
+
+write(z, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/function_recursive_inlining.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/function_recursive_inlining.dml b/src/test/scripts/functions/misc/function_recursive_inlining.dml
index 6f5db7a..b01a14b 100644
--- a/src/test/scripts/functions/misc/function_recursive_inlining.dml
+++ b/src/test/scripts/functions/misc/function_recursive_inlining.dml
@@ -19,29 +19,29 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo2(B+1);
-}
-
-foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo3(B+1);
-}
-
-foo3 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo4(B+1);
-}
-
-foo4 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = foo5(B+1);
-}
-
-foo5 = function( Matrix[Double] B ) return (Matrix[Double] V) {
-   V = B+1;
-}
-
-X = matrix($3, rows=$1, cols=$2);
-Y = foo1(X);
-z = sum(Y);
-
-write(z, $4);
+
+foo1 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo2(B+1);
+}
+
+foo2 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo3(B+1);
+}
+
+foo3 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo4(B+1);
+}
+
+foo4 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = foo5(B+1);
+}
+
+foo5 = function( Matrix[Double] B ) return (Matrix[Double] V) {
+   V = B+1;
+}
+
+X = matrix($3, rows=$1, cols=$2);
+Y = foo1(X);
+z = sum(Y);
+
+write(z, $4);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/iterablePredicate.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/iterablePredicate.dml b/src/test/scripts/functions/misc/iterablePredicate.dml
index 6b1aef8..5e579fd 100644
--- a/src/test/scripts/functions/misc/iterablePredicate.dml
+++ b/src/test/scripts/functions/misc/iterablePredicate.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-from = $1;
-to = $2;
-
-# test auto casting constant propagation
-for( i in from:to ) {
-   print( "Result: "+i );
-}
-
-# test auto casting with variables
-if( 1==1 ){
-   from = 1 + from;
-   to = 10 + to;
-}
-for( i in from:to ) {
-   print( "Result: "+i );
-}
+
+from = $1;
+to = $2;
+
+# test auto casting constant propagation
+for( i in from:to ) {
+   print( "Result: "+i );
+}
+
+# test auto casting with variables
+if( 1==1 ){
+   from = 1 + from;
+   to = 10 + to;
+}
+for( i in from:to ) {
+   print( "Result: "+i );
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred1a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred1a.dml b/src/test/scripts/functions/parfor/for_pred1a.dml
index 0602846..f7d0b4b 100644
--- a/src/test/scripts/functions/parfor/for_pred1a.dml
+++ b/src/test/scripts/functions/parfor/for_pred1a.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-sum = 0;
-for( i in $1:$2 ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+sum = 0;
+for( i in $1:$2 ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred1b.dml b/src/test/scripts/functions/parfor/for_pred1b.dml
index c64bf43..b2f0837 100644
--- a/src/test/scripts/functions/parfor/for_pred1b.dml
+++ b/src/test/scripts/functions/parfor/for_pred1b.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-sum = 0;
-for( i in seq($1,$2,$3) ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+sum = 0;
+for( i in seq($1,$2,$3) ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred2a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred2a.dml b/src/test/scripts/functions/parfor/for_pred2a.dml
index 08d2bd0..4f936bd 100644
--- a/src/test/scripts/functions/parfor/for_pred2a.dml
+++ b/src/test/scripts/functions/parfor/for_pred2a.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-a = $1+1-1;
-b = $2+1-1;
-
-sum = 0;
-for( i in a:b ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+a = $1+1-1;
+b = $2+1-1;
+
+sum = 0;
+for( i in a:b ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred2b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred2b.dml b/src/test/scripts/functions/parfor/for_pred2b.dml
index 4c02550..067b43c 100644
--- a/src/test/scripts/functions/parfor/for_pred2b.dml
+++ b/src/test/scripts/functions/parfor/for_pred2b.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-a = $1+1-1;
-b = $2+1-1;
-c = $3+1-1;
-
-sum = 0;
-for( i in seq(a,b,c) ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+a = $1+1-1;
+b = $2+1-1;
+c = $3+1-1;
+
+sum = 0;
+for( i in seq(a,b,c) ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred3a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred3a.dml b/src/test/scripts/functions/parfor/for_pred3a.dml
index 59ef382..cac9e44 100644
--- a/src/test/scripts/functions/parfor/for_pred3a.dml
+++ b/src/test/scripts/functions/parfor/for_pred3a.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-sum = 0;
-for( i in ($1+1-1):($2+1-1) ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+sum = 0;
+for( i in ($1+1-1):($2+1-1) ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/for_pred3b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/for_pred3b.dml b/src/test/scripts/functions/parfor/for_pred3b.dml
index 1c615b8..86843ff 100644
--- a/src/test/scripts/functions/parfor/for_pred3b.dml
+++ b/src/test/scripts/functions/parfor/for_pred3b.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-sum = 0;
-for( i in seq(($1+1-1),($2+1-1),($3+1-1)) ) 
-{
-   sum = sum + 1; 
-}  
-
-R = matrix(1, rows=1, cols=1);
-R = R * sum;
+
+sum = 0;
+for( i in seq(($1+1-1),($2+1-1),($3+1-1)) ) 
+{
+   sum = sum + 1; 
+}  
+
+R = matrix(1, rows=1, cols=1);
+R = R * sum;
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor1.dml b/src/test/scripts/functions/parfor/parfor1.dml
index d4dab06..311d1c0 100644
--- a/src/test/scripts/functions/parfor/parfor1.dml
+++ b/src/test/scripts/functions/parfor/parfor1.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-a = 1;
-
-parfor( i in 1:10 )
-{
-   b = i + a;
-   #print(b);
-}
+
+a = 1;
+
+parfor( i in 1:10 )
+{
+   b = i + a;
+   #print(b);
+}
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor10.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor10.dml b/src/test/scripts/functions/parfor/parfor10.dml
index b4a7861..c4d77da 100644
--- a/src/test/scripts/functions/parfor/parfor10.dml
+++ b/src/test/scripts/functions/parfor/parfor10.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 1:10 )
-{ 
-   a = i;
-   A[i,1] = dummy*a;
-}
-
+
+A = matrix(0,rows=10,cols=1);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 1:10 )
+{ 
+   a = i;
+   A[i,1] = dummy*a;
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor11.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor11.dml b/src/test/scripts/functions/parfor/parfor11.dml
index b1cffe2..afd88c5 100644
--- a/src/test/scripts/functions/parfor/parfor11.dml
+++ b/src/test/scripts/functions/parfor/parfor11.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 1:10 )
-{ 
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0, rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 1:10 )
+{ 
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor12.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor12.dml b/src/test/scripts/functions/parfor/parfor12.dml
index 8dd500f..8c257dd 100644
--- a/src/test/scripts/functions/parfor/parfor12.dml
+++ b/src/test/scripts/functions/parfor/parfor12.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 1:9 )
-{ 
-   A[i,1] = B[i+1,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 1:9 )
+{ 
+   A[i,1] = B[i+1,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor13.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor13.dml b/src/test/scripts/functions/parfor/parfor13.dml
index af2b2cb..d0dcadf 100644
--- a/src/test/scripts/functions/parfor/parfor13.dml
+++ b/src/test/scripts/functions/parfor/parfor13.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 1:9 )
-{ 
-   A[i,1] = B[i,1] + B[i+1,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 1:9 )
+{ 
+   A[i,1] = B[i,1] + B[i+1,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor14.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor14.dml b/src/test/scripts/functions/parfor/parfor14.dml
index bb4db22..bbc1a7c 100644
--- a/src/test/scripts/functions/parfor/parfor14.dml
+++ b/src/test/scripts/functions/parfor/parfor14.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = B[i,1] + A[i-1,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = B[i,1] + A[i-1,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor15.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor15.dml b/src/test/scripts/functions/parfor/parfor15.dml
index a8bc2a6..5c11ed1 100644
--- a/src/test/scripts/functions/parfor/parfor15.dml
+++ b/src/test/scripts/functions/parfor/parfor15.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-parfor( i in 1:10 )
-{ 
-   A[i,1] = B[i,1];
-   A[i+10,1] = B[i+10,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+parfor( i in 1:10 )
+{ 
+   A[i,1] = B[i,1];
+   A[i+10,1] = B[i+10,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor16.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor16.dml b/src/test/scripts/functions/parfor/parfor16.dml
index db55b28..ce29503 100644
--- a/src/test/scripts/functions/parfor/parfor16.dml
+++ b/src/test/scripts/functions/parfor/parfor16.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-parfor( i in 1:10 )
-{ 
-   A[i,1] = B[i,1];
-   A[i*2,1] = B[i*2,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+parfor( i in 1:10 )
+{ 
+   A[i,1] = B[i,1];
+   A[i*2,1] = B[i*2,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor17.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor17.dml b/src/test/scripts/functions/parfor/parfor17.dml
index e1d7704..a635271 100644
--- a/src/test/scripts/functions/parfor/parfor17.dml
+++ b/src/test/scripts/functions/parfor/parfor17.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD true, Banerjee true
-parfor( i in 1:10 )
-{ 
-   A[2*i+10,1] = B[i,1];
-   A[5*i,1] = B[i,1];
-}
-
+
+A = matrix(0, rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD true, Banerjee true
+parfor( i in 1:10 )
+{ 
+   A[2*i+10,1] = B[i,1];
+   A[5*i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor18.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor18.dml b/src/test/scripts/functions/parfor/parfor18.dml
index ae6711e..d04c6b1 100644
--- a/src/test/scripts/functions/parfor/parfor18.dml
+++ b/src/test/scripts/functions/parfor/parfor18.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD true, Banerjee false
-parfor( i in 1:2 )
-{ 
-   A[2*i+10,1] = B[i,1];
-   A[5*i,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD true, Banerjee false
+parfor( i in 1:2 )
+{ 
+   A[2*i+10,1] = B[i,1];
+   A[5*i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor19.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor19.dml b/src/test/scripts/functions/parfor/parfor19.dml
index df20243..7f63218 100644
--- a/src/test/scripts/functions/parfor/parfor19.dml
+++ b/src/test/scripts/functions/parfor/parfor19.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD false
-parfor( i in 1:4 )
-{ 
-   A[7*i+1,1] = B[i,1];
-   A[13*i+1,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD false
+parfor( i in 1:4 )
+{ 
+   A[7*i+1,1] = B[i,1];
+   A[13*i+1,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor2.dml b/src/test/scripts/functions/parfor/parfor2.dml
index da4e83c..62bc86b 100644
--- a/src/test/scripts/functions/parfor/parfor2.dml
+++ b/src/test/scripts/functions/parfor/parfor2.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-a = 1;
-
-parfor( i in 1:10 )
-{
-   b = i + a;
-   a = b;
-   #print(a);
-   #print(b);
-}
-
-#print(a);
+
+a = 1;
+
+parfor( i in 1:10 )
+{
+   b = i + a;
+   a = b;
+   #print(a);
+   #print(b);
+}
+
+#print(a);
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor20.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor20.dml b/src/test/scripts/functions/parfor/parfor20.dml
index bc987da..c7eb9af 100644
--- a/src/test/scripts/functions/parfor/parfor20.dml
+++ b/src/test/scripts/functions/parfor/parfor20.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD false
-parfor( i in 10:15 )
-{ 
-
-   B[i,1] = A[i-10,1];
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD false
+parfor( i in 10:15 )
+{ 
+
+   B[i,1] = A[i-10,1];
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor21.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor21.dml b/src/test/scripts/functions/parfor/parfor21.dml
index d087d2b..5038caa 100644
--- a/src/test/scripts/functions/parfor/parfor21.dml
+++ b/src/test/scripts/functions/parfor/parfor21.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD false
-parfor( i in 10:15 )
-{ 
-
-   B[i,1] = A[i-5,1];
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD false
+parfor( i in 10:15 )
+{ 
+
+   B[i,1] = A[i-5,1];
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor22.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor22.dml b/src/test/scripts/functions/parfor/parfor22.dml
index e3425b0..83d6cf9 100644
--- a/src/test/scripts/functions/parfor/parfor22.dml
+++ b/src/test/scripts/functions/parfor/parfor22.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-
-#GCD false
-parfor( i in 10:15 )
-{ 
-
-   B[i,1] = A[i-10,1];
-   #print(B[i,1]);
-   B[i,1] = A[i-10,1]+1;
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+
+#GCD false
+parfor( i in 10:15 )
+{ 
+
+   B[i,1] = A[i-10,1];
+   #print(B[i,1]);
+   B[i,1] = A[i-10,1]+1;
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor23.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor23.dml b/src/test/scripts/functions/parfor/parfor23.dml
index 15305a0..0761afb 100644
--- a/src/test/scripts/functions/parfor/parfor23.dml
+++ b/src/test/scripts/functions/parfor/parfor23.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in seq(2,10,2) )
-{ 
-   B[i,1] = dummy*i;
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0,rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in seq(2,10,2) )
+{ 
+   B[i,1] = dummy*i;
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor24.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor24.dml b/src/test/scripts/functions/parfor/parfor24.dml
index 506ede8..50d1cd2 100644
--- a/src/test/scripts/functions/parfor/parfor24.dml
+++ b/src/test/scripts/functions/parfor/parfor24.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=20,cols=1);
-B = Rand(rows=20,cols=1);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in seq(1,10,1) )
-{ 
-   B[i,1] = dummy*i;
-   A[i,1] = B[i,1];
-}
-
+
+A = matrix(0, rows=20,cols=1);
+B = Rand(rows=20,cols=1);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in seq(1,10,1) )
+{ 
+   B[i,1] = dummy*i;
+   A[i,1] = B[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor25.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor25.dml b/src/test/scripts/functions/parfor/parfor25.dml
index 7d15541..0617606 100644
--- a/src/test/scripts/functions/parfor/parfor25.dml
+++ b/src/test/scripts/functions/parfor/parfor25.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=2,cols=20);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 1:20 )
-{ 
-   A[1,i] = dummy*i;
-   A[2,i] = dummy*i; 
-}
-
+
+A = matrix(0,rows=2,cols=20);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 1:20 )
+{ 
+   A[1,i] = dummy*i;
+   A[2,i] = dummy*i; 
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor26.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor26.dml b/src/test/scripts/functions/parfor/parfor26.dml
index 214283b..c5c07fb 100644
--- a/src/test/scripts/functions/parfor/parfor26.dml
+++ b/src/test/scripts/functions/parfor/parfor26.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=2);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{ 
-   A[i,1] = dummy*i;
-   A[i,2] = dummy*i; 
-}
-
+
+A = matrix(0,rows=20,cols=2);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{ 
+   A[i,1] = dummy*i;
+   A[i,2] = dummy*i; 
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor26b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor26b.dml b/src/test/scripts/functions/parfor/parfor26b.dml
index 802a412..d40dde2 100644
--- a/src/test/scripts/functions/parfor/parfor26b.dml
+++ b/src/test/scripts/functions/parfor/parfor26b.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=10);
-
-parfor( i in 2:10 )
-{ 
-   B = A[1,i-1];
-   A[1,i] = B*i; 
-}
-
+
+A = matrix(0,rows=10,cols=10);
+
+parfor( i in 2:10 )
+{ 
+   B = A[1,i-1];
+   A[1,i] = B*i; 
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor26c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor26c.dml b/src/test/scripts/functions/parfor/parfor26c.dml
index 1125fde..23d3e44 100644
--- a/src/test/scripts/functions/parfor/parfor26c.dml
+++ b/src/test/scripts/functions/parfor/parfor26c.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=10,cols=10);
-
-parfor( i in 1:4 )
-{ 
-   B = A[2*i+1,1];
-   A[2*i,1] = B*i; 
-}
-
+
+A = matrix(0, rows=10,cols=10);
+
+parfor( i in 1:4 )
+{ 
+   B = A[2*i+1,1];
+   A[2*i,1] = B*i; 
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor26c2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor26c2.dml b/src/test/scripts/functions/parfor/parfor26c2.dml
index 9832da5..f10155e 100644
--- a/src/test/scripts/functions/parfor/parfor26c2.dml
+++ b/src/test/scripts/functions/parfor/parfor26c2.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=10);
-
-parfor( i in 1:4 )
-{ 
-   B = A[2*i+1, ];
-   A[2*i, ] = B*i; 
-}
-
+
+A = matrix(0,rows=10,cols=10);
+
+parfor( i in 1:4 )
+{ 
+   B = A[2*i+1, ];
+   A[2*i, ] = B*i; 
+}
+
 #print(A);
\ No newline at end of file



[23/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml b/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml
index 18c8eb9..6514b63 100644
--- a/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml
+++ b/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml
@@ -1,501 +1,501 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports
-#        test/scripts/applications/impute/initial_reports_preprocessed
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-initial_reports = read ($1);
-
-is_GROUP_4_ENABLED = 0;        #   = 1 or 0 ("0" if Group-4 = all 0s)
-num_EXTRA_MISSING_FREES = 0;   #   = 0 ("3" or "6" for Uganda)
-
-num_known_terms = 5;      # The number of known term reports
-num_predicted_terms = 1;  # The number of predicted (future) term reports
-
-num_terms = num_known_terms + num_predicted_terms + 1;  #  We predict the "0-th" report, too
-num_attrs = 19;  
-
-num_frees_per_term = 13;
-if (is_GROUP_4_ENABLED == 1) {
-    num_frees_per_term = 15;
-}
-num_regular_frees = (num_predicted_terms + 1) * num_frees_per_term;
-num_frees = num_regular_frees + num_EXTRA_MISSING_FREES;
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# ---------------------------------------------------------
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-
-for (dt in 0:num_predicted_terms)
-{
-    ta_shift = 0;
-    if (dt > 0) {
-        ta_shift = (num_known_terms + dt) * num_attrs;
-    }
-    fv_shift = dt * num_frees_per_term;
-    
-# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
-# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
-    for (i in 1:6) {
-        CReps [ta_shift + 1,     fv_shift + i] = 1.0 + zero;
-        CReps [ta_shift + 1 + i, fv_shift + i] = 1.0 + zero;
-    }
-# row 8 is free variable not appearing in any non-free variable
-    CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero;
-
-# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
-# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
-    for (i in 1:6) {
-        CReps [ta_shift + 9,     fv_shift + 7 + i] = 1.0 + zero;
-        CReps [ta_shift + 9 + i, fv_shift + 7 + i] = 1.0 + zero;
-    }
-# constraint that          row16 =  row14 +  row15
-# translated to free vars: row16 = free14 + free15
-if (is_GROUP_4_ENABLED == 1) {
-    for (i in 1:2) {
-        CReps [ta_shift + 16,     fv_shift + 13 + i] = 1.0 + zero;
-        CReps [ta_shift + 16 + i, fv_shift + 13 + i] = 1.0 + zero;
-    }
-}
-# constraint that           row19 = total cost (all free variables)
-# translated to free vars:  row19 = all free variables
-    for (i in 1:num_frees_per_term) {
-        CReps [ta_shift + 19, fv_shift + i] = 1.0 + zero;
-    }
-}
-
-# ---------------------------------------------------------
-# SPECIAL FREE VARIABLES TO HANDLE UGANDA'S MISSING VALUES
-# ---------------------------------------------------------
-
-if (num_EXTRA_MISSING_FREES == 3 | num_EXTRA_MISSING_FREES == 6)
-{
-    ta_shift = 3 * num_attrs;
-    CReps [ta_shift +  4,  num_regular_frees + 1] =  1.0 + zero;
-    CReps [ta_shift +  5,  num_regular_frees + 2] =  1.0 + zero;
-    CReps [ta_shift +  6,  num_regular_frees + 3] =  1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 1] = -1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 2] = -1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 3] = -1.0 + zero;
-}
-
-if (num_EXTRA_MISSING_FREES == 6)
-{
-    ta_shift = 7 * num_attrs;
-    CReps [ta_shift +  4,  num_regular_frees + 4] =  1.0 + zero;
-    CReps [ta_shift +  5,  num_regular_frees + 5] =  1.0 + zero;
-    CReps [ta_shift +  6,  num_regular_frees + 6] =  1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 4] = -1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 5] = -1.0 + zero;
-    CReps [ta_shift +  7,  num_regular_frees + 6] = -1.0 + zero;
-}
-
-
-# ---------------------------------------------------------------------------------------
-#
-# In all regressions, except the last few "special" ones, there are 4 factors:
-# x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
-# The last regressions are for regularization, but they also follow the 4-factor pattern.
-
-num_factors = 4; 
-
-# We have one regression equation per time-term for each attribute,
-# plus a few "special" regularization regression equations:
-
-num_special_regs = 12;
-if (is_GROUP_4_ENABLED == 1) {
-    num_special_regs = 16;
-}
-
-num_reg_eqs = num_terms * num_attrs + num_special_regs;
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# All regression equations for the same attribute share the same parameters, regardless
-# of the term; some parameters are shared across multiple attributes, (those attributes
-# whose behavior is believed to be similar) as specified in the table below:
-
-num_params = 28;
-if (is_GROUP_4_ENABLED == 1) {
-    num_params = 35;
-}
-
-# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
-#                                          self[t-2]
-# PARAMS:
-# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
-# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
-# Group 1:    "      prm#03       "          "       Row #03 = free#02
-# Group 1:    "      prm#04       "          "       Row #04 = free#03
-# Group 1:    "      prm#05       "          "       Row #05 = free#04
-# Group 1:    "      prm#06       "          "       Row #06 = free#05
-# Group 1:    "      prm#07       "          "       Row #07 = free#06
-# --------------------------------------------------------------------
-# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
-# --------------------------------------------------------------------
-# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
-# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
-# Group 3:    "      prm#17       "          "       Row #11 = free#09
-# Group 3:    "      prm#18       "          "       Row #12 = free#10
-# Group 3:    "      prm#19       "          "       Row #13 = free#11
-# Group 3:    "      prm#20       "          "       Row #14 = free#12
-# Group 3:    "      prm#21       "          "       Row #15 = free#13
-# --------------------------------------------------------------------
-# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
-# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
-# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
-# Group 4:    "      prm#31       "          "       Row #18 = free#15
-# --------------------------------------------------------------------
-# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
-# 
-# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
-#  the total cost in Group 5 regresses on the intercept.)
-
-# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
-# Factors:   1.0      -1.0       0.0        0.0
-# PARAMS:
-#          prm#27      1.0       0.0        0.0  # self[t-1]
-#          prm#28      0.0       0.0        0.0  # trend
-#          prm#08      0.0       0.0        0.0  # self[t-1]
-#          prm#09      0.0       0.0        0.0  # trend
-#          prm#10      0.0       0.0        0.0  # self[t-1]
-#          prm#11      0.0       0.0        0.0  # trend
-#          prm#13      0.0       0.0        0.0  # self[t-1]
-#          prm#14      0.0       0.0        0.0  # trend
-#          prm#22      0.0       0.0        0.0  # self[t-1]
-#          prm#23      0.0       0.0        0.0  # trend
-#          prm#24      0.0       0.0        0.0  # self[t-1]
-#          prm#25      0.0       0.0        0.0  # trend
-### GROUP-4 ZEROS: THESE EQUATIONS USE REVOKED PARAMETERS AND DO NOT APPEAR
-#          prm#32      0.0       0.0        0.0  # self[t-1]
-#          prm#33      0.0       0.0        0.0  # trend
-#          prm#34      0.0       0.0        0.0  # self[t-1]
-#          prm#35      0.0       0.0        0.0  # trend
-#
-# ---------------------------------------------------------------------------------------
-
-
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-
-for (t in 1 : num_terms) {
-    for (i in 1 : num_attrs) {
-
-reg_index = ((t-1) * num_attrs + i - 1) * num_factors;
-
-# -------------------------------
-# SETTING FACTORS #1, #3, and #4:
-# -------------------------------
-
-if (t == 1 & i != 19) { # THESE "REGRESSIONS" ARE DIFFERENT (MORE LIKE REGULARIZATIONS):
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
-    RegresValueMap [reg_index + 3, (t-1) * num_attrs + i ] =  1.5 + zero; # 3rd factor is approximated as: 
-    RegresValueMap [reg_index + 3,  t    * num_attrs + i ] = -0.3 + zero; #   1.5 x[t] - 0.3 x[t+1] - 0.2 x[t+2] =
-    RegresValueMap [reg_index + 3, (t+1) * num_attrs + i ] = -0.2 + zero; #   x[t] - 0.5 (x[t+1] - x[t]) - 0.2 (x[t+2] - x[t+1])
-}
-if (t == 2) {
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
-    RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] =  1.0 + zero; # 3rd factor: x[t-1]
-    w = 0.5;
-    RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = (- 1 - w) + zero; # 4th factor is approximated as:
-    RegresValueMap [reg_index + 4, (t-1) * num_attrs + i ] = (1 + 2*w) + zero; #   - (1+w)x[t-1] + (1+2w)x[t] - w x[t+1] =
-    RegresValueMap [reg_index + 4,  t    * num_attrs + i ] =     (- w) + zero; #   (x[t]-x[t-1]) - w * ((x[t+1]-x[t]) - (x[t]-x[t-1]))
-}
-if (t >= 3) {
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
-    RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] =  1.0 + zero; # 3rd factor: x[t-1]
-    RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] =  1.0 + zero; # 4th factor is
-    RegresValueMap [reg_index + 4, (t-3) * num_attrs + i ] = -1.0 + zero; #   x[t-1] - x[t-2]
-}
-
-# -------------------------------------------
-# SETTING FACTOR #2 DEPENDS ON THE ATTRIBUTE:
-# -------------------------------------------
-
-if (i == 1) { # GROUP 1 SUBTOTAL
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
-}
-if (2 <= i & i <= 7) { # GROUP 1 ATTRIBUTES
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs +  1] =  1.0 + zero; # 2nd factor: Row#01[t]
-}
-
-if (i == 8) { # GROUP 2 SUBTOTAL
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
-}
-
-if (i == 9) { # GROUP 3 SUBTOTAL
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
-}
-if (10 <= i & i <= 15) { # GROUP 3 ATTRIBUTES:
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs +  9] =  1.0 + zero; # 2nd factor: Row#09[t]
-}
-
-if (i == 16) { # GROUP 4 SUBTOTAL
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
-}
-if (17 <= i & i <= 18) { # GROUP 4 ATTRIBUTES:
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] =  1.0 + zero; # 2nd factor: Row#16[t]
-}
-
-if (i == 19 & t >= 2) { # THE TOTAL, ONLY FOR t >= 2
-    RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
-}
-
-###
-###  SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS ARE HANDLED SEPARATELY!
-###
-
-}}
-
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-for (t in 1 : num_terms) {
-    ta_shift = (t-1) * num_attrs - 1;
-
-# Group 1 attributes:
-    reg_index = (ta_shift + 1) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
-    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
-    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
-    for (i in 2 : 7) {
-        reg_index = (ta_shift + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
-        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
-        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
-    }
-
-# Group 2 attribute:
-    reg_index = (ta_shift + 8) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
-    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
-    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
-
-# Group 3 attributes:
-    reg_index = (ta_shift + 9) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
-    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
-    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
-    for (i in 10 : 15) {
-        reg_index = (ta_shift + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
-        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
-        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
-    }
-    
-# Group 4 attributes:
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = (ta_shift + 16) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
-    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
-    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
-    for (i in 17 : 18) {
-        reg_index = (ta_shift + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
-        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
-        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
-    }
-}
-
-# Group 5 attribute:
-    reg_index = (ta_shift + 19) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
-    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
-    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
-}
-
-
-# ----------------------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION
-# ----------------------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-
-global_weight = 0.5 + zero;
-
-attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
-max_attr_size = max (attribute_size);
-
-for (t in 1 : num_terms) {
-    for (i in 1 : num_attrs) {
-    
-    scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
-    acceptable_drift = scale_down * max_attr_size * 0.002;
-    if (t == 1) {
-        acceptable_drift = acceptable_drift * 10;
-    }
-
-    regeqn = (t-1) * num_attrs + i;
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2);
-
-}}
-
-
-
-
-# ----------------------------------------------------------------
-#         SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS        
-# GENERATE ALL THEIR AFFINE MAPS AND SCALE MULTIPLIERS ("WEIGHTS")
-# ----------------------------------------------------------------
-
-acceptable_drift = 0.02;
-
-# DO WHAT (ALMOST) ALL REGULARIZATIONS NEED
-for (i in 1:num_special_regs) {
-    reg_index = (num_reg_eqs - num_special_regs + i - 1) * num_factors;
-    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
-    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
-    regeqn = num_reg_eqs - num_special_regs + i;
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2);
-}
-
-reg_index = (num_reg_eqs - num_special_regs) * num_factors;
-
-# PARAMETER #27, TOTAL's "self[t-1]"
-    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;
-    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-    
-    regeqn = num_reg_eqs - num_special_regs + 1;
-    drift_acceptable_here = acceptable_drift / 4;
-    RegresScaleMult [regeqn, 1] = global_weight / (drift_acceptable_here ^ 2);
-    
-reg_index = reg_index + num_factors;
-
-# PARAMETER #28, TOTAL's "trend"
-    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;
-    RegresCoeffDefault [reg_index + 2, 1] = 0.7 + zero;
-### RegresParamMap [reg_index + 2, 27] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #08, GROUP-1 SUBTOTAL's "self[t-1]"
-    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #09, GROUP-1 SUBTOTAL's "trend"
-    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 08] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #10, GROUP-1 VALUE's "self[t-1]"
-    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #11, GROUP-1 VALUE's "trend"
-    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 10] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #13, GROUP-2 SUBTOTAL's "self[t-1]"
-    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #14, GROUP-2 SUBTOTAL's "trend"
-    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 13] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #22, GROUP-3 SUBTOTAL's "self[t-1]"
-    RegresParamMap [reg_index + 1, 22] = 1.0 + zero; 
-reg_index = reg_index + num_factors;
-
-# PARAMETER #23, GROUP-3 SUBTOTAL's "trend"
-    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 22] = 1.0 + zero; 
-reg_index = reg_index + num_factors;
-
-# PARAMETER #24, GROUP-3 VALUE's "self[t-1]"
-    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #25, GROUP-3 VALUE's "trend"
-    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 24] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-if (is_GROUP_4_ENABLED == 1) {
-
-# PARAMETER #32, GROUP-4 SUBTOTAL's "self[t-1]"
-    RegresParamMap [reg_index + 1, 32] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-    
-# PARAMETER #33, GROUP-4 SUBTOTAL's "trend"
-    RegresParamMap [reg_index + 1, 33] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 32] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #34, GROUP-4 VALUE's "self[t-1]"
-    RegresParamMap [reg_index + 1, 34] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-
-# PARAMETER #35, GROUP-4 VALUE's "trend"
-    RegresParamMap [reg_index + 1, 35] = 1.0 + zero;
-    RegresParamMap [reg_index + 2, 34] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-}
-
-
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-initial_reports_preprocessed = matrix (0.0, rows = num_attrs, cols = num_terms);
-initial_reports_preprocessed [, 2:(num_known_terms+1)] = initial_reports [, 1:num_known_terms];
-
-write (initial_reports_preprocessed, $2, format="text");
-write (CReps,              $3, format="text");
-write (RegresValueMap,     $4, format="text");
-write (RegresFactorDefault,$5, format="text");
-write (RegresParamMap,     $6, format="text");
-write (RegresCoeffDefault, $7, format="text");
-write (RegresScaleMult,    $8, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports
+#        test/scripts/applications/impute/initial_reports_preprocessed
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+initial_reports = read ($1);
+
+is_GROUP_4_ENABLED = 0;        #   = 1 or 0 ("0" if Group-4 = all 0s)
+num_EXTRA_MISSING_FREES = 0;   #   = 0 ("3" or "6" for Uganda)
+
+num_known_terms = 5;      # The number of known term reports
+num_predicted_terms = 1;  # The number of predicted (future) term reports
+
+num_terms = num_known_terms + num_predicted_terms + 1;  #  We predict the "0-th" report, too
+num_attrs = 19;  
+
+num_frees_per_term = 13;
+if (is_GROUP_4_ENABLED == 1) {
+    num_frees_per_term = 15;
+}
+num_regular_frees = (num_predicted_terms + 1) * num_frees_per_term;
+num_frees = num_regular_frees + num_EXTRA_MISSING_FREES;
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# ---------------------------------------------------------
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+
+for (dt in 0:num_predicted_terms)
+{
+    ta_shift = 0;
+    if (dt > 0) {
+        ta_shift = (num_known_terms + dt) * num_attrs;
+    }
+    fv_shift = dt * num_frees_per_term;
+    
+# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
+# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
+    for (i in 1:6) {
+        CReps [ta_shift + 1,     fv_shift + i] = 1.0 + zero;
+        CReps [ta_shift + 1 + i, fv_shift + i] = 1.0 + zero;
+    }
+# row 8 is free variable not appearing in any non-free variable
+    CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero;
+
+# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
+# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
+    for (i in 1:6) {
+        CReps [ta_shift + 9,     fv_shift + 7 + i] = 1.0 + zero;
+        CReps [ta_shift + 9 + i, fv_shift + 7 + i] = 1.0 + zero;
+    }
+# constraint that          row16 =  row14 +  row15
+# translated to free vars: row16 = free14 + free15
+if (is_GROUP_4_ENABLED == 1) {
+    for (i in 1:2) {
+        CReps [ta_shift + 16,     fv_shift + 13 + i] = 1.0 + zero;
+        CReps [ta_shift + 16 + i, fv_shift + 13 + i] = 1.0 + zero;
+    }
+}
+# constraint that           row19 = total cost (all free variables)
+# translated to free vars:  row19 = all free variables
+    for (i in 1:num_frees_per_term) {
+        CReps [ta_shift + 19, fv_shift + i] = 1.0 + zero;
+    }
+}
+
+# ---------------------------------------------------------
+# SPECIAL FREE VARIABLES TO HANDLE UGANDA'S MISSING VALUES
+# ---------------------------------------------------------
+
+if (num_EXTRA_MISSING_FREES == 3 | num_EXTRA_MISSING_FREES == 6)
+{
+    ta_shift = 3 * num_attrs;
+    CReps [ta_shift +  4,  num_regular_frees + 1] =  1.0 + zero;
+    CReps [ta_shift +  5,  num_regular_frees + 2] =  1.0 + zero;
+    CReps [ta_shift +  6,  num_regular_frees + 3] =  1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 1] = -1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 2] = -1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 3] = -1.0 + zero;
+}
+
+if (num_EXTRA_MISSING_FREES == 6)
+{
+    ta_shift = 7 * num_attrs;
+    CReps [ta_shift +  4,  num_regular_frees + 4] =  1.0 + zero;
+    CReps [ta_shift +  5,  num_regular_frees + 5] =  1.0 + zero;
+    CReps [ta_shift +  6,  num_regular_frees + 6] =  1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 4] = -1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 5] = -1.0 + zero;
+    CReps [ta_shift +  7,  num_regular_frees + 6] = -1.0 + zero;
+}
+
+
+# ---------------------------------------------------------------------------------------
+#
+# In all regressions, except the last few "special" ones, there are 4 factors:
+# x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
+# The last regressions are for regularization, but they also follow the 4-factor pattern.
+
+num_factors = 4; 
+
+# We have one regression equation per time-term for each attribute,
+# plus a few "special" regularization regression equations:
+
+num_special_regs = 12;
+if (is_GROUP_4_ENABLED == 1) {
+    num_special_regs = 16;
+}
+
+num_reg_eqs = num_terms * num_attrs + num_special_regs;
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# All regression equations for the same attribute share the same parameters, regardless
+# of the term; some parameters are shared across multiple attributes, (those attributes
+# whose behavior is believed to be similar) as specified in the table below:
+
+num_params = 28;
+if (is_GROUP_4_ENABLED == 1) {
+    num_params = 35;
+}
+
+# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
+#                                          self[t-2]
+# PARAMS:
+# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
+# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
+# Group 1:    "      prm#03       "          "       Row #03 = free#02
+# Group 1:    "      prm#04       "          "       Row #04 = free#03
+# Group 1:    "      prm#05       "          "       Row #05 = free#04
+# Group 1:    "      prm#06       "          "       Row #06 = free#05
+# Group 1:    "      prm#07       "          "       Row #07 = free#06
+# --------------------------------------------------------------------
+# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
+# --------------------------------------------------------------------
+# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
+# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
+# Group 3:    "      prm#17       "          "       Row #11 = free#09
+# Group 3:    "      prm#18       "          "       Row #12 = free#10
+# Group 3:    "      prm#19       "          "       Row #13 = free#11
+# Group 3:    "      prm#20       "          "       Row #14 = free#12
+# Group 3:    "      prm#21       "          "       Row #15 = free#13
+# --------------------------------------------------------------------
+# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
+# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
+# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
+# Group 4:    "      prm#31       "          "       Row #18 = free#15
+# --------------------------------------------------------------------
+# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
+# 
+# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
+#  the total cost in Group 5 regresses on the intercept.)
+
+# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
+# Factors:   1.0      -1.0       0.0        0.0
+# PARAMS:
+#          prm#27      1.0       0.0        0.0  # self[t-1]
+#          prm#28      0.0       0.0        0.0  # trend
+#          prm#08      0.0       0.0        0.0  # self[t-1]
+#          prm#09      0.0       0.0        0.0  # trend
+#          prm#10      0.0       0.0        0.0  # self[t-1]
+#          prm#11      0.0       0.0        0.0  # trend
+#          prm#13      0.0       0.0        0.0  # self[t-1]
+#          prm#14      0.0       0.0        0.0  # trend
+#          prm#22      0.0       0.0        0.0  # self[t-1]
+#          prm#23      0.0       0.0        0.0  # trend
+#          prm#24      0.0       0.0        0.0  # self[t-1]
+#          prm#25      0.0       0.0        0.0  # trend
+### GROUP-4 ZEROS: THESE EQUATIONS USE REVOKED PARAMETERS AND DO NOT APPEAR
+#          prm#32      0.0       0.0        0.0  # self[t-1]
+#          prm#33      0.0       0.0        0.0  # trend
+#          prm#34      0.0       0.0        0.0  # self[t-1]
+#          prm#35      0.0       0.0        0.0  # trend
+#
+# ---------------------------------------------------------------------------------------
+
+
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+
+for (t in 1 : num_terms) {
+    for (i in 1 : num_attrs) {
+
+reg_index = ((t-1) * num_attrs + i - 1) * num_factors;
+
+# -------------------------------
+# SETTING FACTORS #1, #3, and #4:
+# -------------------------------
+
+if (t == 1 & i != 19) { # THESE "REGRESSIONS" ARE DIFFERENT (MORE LIKE REGULARIZATIONS):
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
+    RegresValueMap [reg_index + 3, (t-1) * num_attrs + i ] =  1.5 + zero; # 3rd factor is approximated as: 
+    RegresValueMap [reg_index + 3,  t    * num_attrs + i ] = -0.3 + zero; #   1.5 x[t] - 0.3 x[t+1] - 0.2 x[t+2] =
+    RegresValueMap [reg_index + 3, (t+1) * num_attrs + i ] = -0.2 + zero; #   x[t] - 0.5 (x[t+1] - x[t]) - 0.2 (x[t+2] - x[t+1])
+}
+if (t == 2) {
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
+    RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] =  1.0 + zero; # 3rd factor: x[t-1]
+    w = 0.5;
+    RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = (- 1 - w) + zero; # 4th factor is approximated as:
+    RegresValueMap [reg_index + 4, (t-1) * num_attrs + i ] = (1 + 2*w) + zero; #   - (1+w)x[t-1] + (1+2w)x[t] - w x[t+1] =
+    RegresValueMap [reg_index + 4,  t    * num_attrs + i ] =     (- w) + zero; #   (x[t]-x[t-1]) - w * ((x[t+1]-x[t]) - (x[t]-x[t-1]))
+}
+if (t >= 3) {
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t]
+    RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] =  1.0 + zero; # 3rd factor: x[t-1]
+    RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] =  1.0 + zero; # 4th factor is
+    RegresValueMap [reg_index + 4, (t-3) * num_attrs + i ] = -1.0 + zero; #   x[t-1] - x[t-2]
+}
+
+# -------------------------------------------
+# SETTING FACTOR #2 DEPENDS ON THE ATTRIBUTE:
+# -------------------------------------------
+
+if (i == 1) { # GROUP 1 SUBTOTAL
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
+}
+if (2 <= i & i <= 7) { # GROUP 1 ATTRIBUTES
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs +  1] =  1.0 + zero; # 2nd factor: Row#01[t]
+}
+
+if (i == 8) { # GROUP 2 SUBTOTAL
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
+}
+
+if (i == 9) { # GROUP 3 SUBTOTAL
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
+}
+if (10 <= i & i <= 15) { # GROUP 3 ATTRIBUTES:
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs +  9] =  1.0 + zero; # 2nd factor: Row#09[t]
+}
+
+if (i == 16) { # GROUP 4 SUBTOTAL
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
+}
+if (17 <= i & i <= 18) { # GROUP 4 ATTRIBUTES:
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] =  1.0 + zero; # 2nd factor: Row#16[t]
+}
+
+if (i == 19 & t >= 2) { # THE TOTAL, ONLY FOR t >= 2
+    RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
+}
+
+###
+###  SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS ARE HANDLED SEPARATELY!
+###
+
+}}
+
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+for (t in 1 : num_terms) {
+    ta_shift = (t-1) * num_attrs - 1;
+
+# Group 1 attributes:
+    reg_index = (ta_shift + 1) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
+    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
+    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
+    for (i in 2 : 7) {
+        reg_index = (ta_shift + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
+        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
+        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
+    }
+
+# Group 2 attribute:
+    reg_index = (ta_shift + 8) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
+    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
+    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
+
+# Group 3 attributes:
+    reg_index = (ta_shift + 9) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
+    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
+    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
+    for (i in 10 : 15) {
+        reg_index = (ta_shift + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
+        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
+        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
+    }
+    
+# Group 4 attributes:
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = (ta_shift + 16) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
+    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
+    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
+    for (i in 17 : 18) {
+        reg_index = (ta_shift + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
+        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
+        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
+    }
+}
+
+# Group 5 attribute:
+    reg_index = (ta_shift + 19) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
+    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
+    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
+}
+
+
+# ----------------------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION
+# ----------------------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+
+global_weight = 0.5 + zero;
+
+attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
+max_attr_size = max (attribute_size);
+
+for (t in 1 : num_terms) {
+    for (i in 1 : num_attrs) {
+    
+    scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
+    acceptable_drift = scale_down * max_attr_size * 0.002;
+    if (t == 1) {
+        acceptable_drift = acceptable_drift * 10;
+    }
+
+    regeqn = (t-1) * num_attrs + i;
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2);
+
+}}
+
+
+
+
+# ----------------------------------------------------------------
+#         SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS        
+# GENERATE ALL THEIR AFFINE MAPS AND SCALE MULTIPLIERS ("WEIGHTS")
+# ----------------------------------------------------------------
+
+acceptable_drift = 0.02;
+
+# DO WHAT (ALMOST) ALL REGULARIZATIONS NEED
+for (i in 1:num_special_regs) {
+    reg_index = (num_reg_eqs - num_special_regs + i - 1) * num_factors;
+    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
+    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
+    regeqn = num_reg_eqs - num_special_regs + i;
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2);
+}
+
+reg_index = (num_reg_eqs - num_special_regs) * num_factors;
+
+# PARAMETER #27, TOTAL's "self[t-1]"
+    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;
+    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+    
+    regeqn = num_reg_eqs - num_special_regs + 1;
+    drift_acceptable_here = acceptable_drift / 4;
+    RegresScaleMult [regeqn, 1] = global_weight / (drift_acceptable_here ^ 2);
+    
+reg_index = reg_index + num_factors;
+
+# PARAMETER #28, TOTAL's "trend"
+    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;
+    RegresCoeffDefault [reg_index + 2, 1] = 0.7 + zero;
+### RegresParamMap [reg_index + 2, 27] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #08, GROUP-1 SUBTOTAL's "self[t-1]"
+    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #09, GROUP-1 SUBTOTAL's "trend"
+    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 08] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #10, GROUP-1 VALUE's "self[t-1]"
+    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #11, GROUP-1 VALUE's "trend"
+    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 10] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #13, GROUP-2 SUBTOTAL's "self[t-1]"
+    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #14, GROUP-2 SUBTOTAL's "trend"
+    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 13] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #22, GROUP-3 SUBTOTAL's "self[t-1]"
+    RegresParamMap [reg_index + 1, 22] = 1.0 + zero; 
+reg_index = reg_index + num_factors;
+
+# PARAMETER #23, GROUP-3 SUBTOTAL's "trend"
+    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 22] = 1.0 + zero; 
+reg_index = reg_index + num_factors;
+
+# PARAMETER #24, GROUP-3 VALUE's "self[t-1]"
+    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #25, GROUP-3 VALUE's "trend"
+    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 24] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+if (is_GROUP_4_ENABLED == 1) {
+
+# PARAMETER #32, GROUP-4 SUBTOTAL's "self[t-1]"
+    RegresParamMap [reg_index + 1, 32] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+    
+# PARAMETER #33, GROUP-4 SUBTOTAL's "trend"
+    RegresParamMap [reg_index + 1, 33] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 32] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #34, GROUP-4 VALUE's "self[t-1]"
+    RegresParamMap [reg_index + 1, 34] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+
+# PARAMETER #35, GROUP-4 VALUE's "trend"
+    RegresParamMap [reg_index + 1, 35] = 1.0 + zero;
+    RegresParamMap [reg_index + 2, 34] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+}
+
+
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+initial_reports_preprocessed = matrix (0.0, rows = num_attrs, cols = num_terms);
+initial_reports_preprocessed [, 2:(num_known_terms+1)] = initial_reports [, 1:num_known_terms];
+
+write (initial_reports_preprocessed, $2, format="text");
+write (CReps,              $3, format="text");
+write (RegresValueMap,     $4, format="text");
+write (RegresFactorDefault,$5, format="text");
+write (RegresParamMap,     $6, format="text");
+write (RegresCoeffDefault, $7, format="text");
+write (RegresScaleMult,    $8, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml b/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml
index 735efe7..82bf551 100644
--- a/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml
+++ b/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml
@@ -1,442 +1,442 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
-#    -args
-#        test/scripts/applications/impute/initial_reports
-#        test/scripts/applications/impute/CReps 
-#        test/scripts/applications/impute/RegresValueMap
-#        test/scripts/applications/impute/RegresFactorDefault
-#        test/scripts/applications/impute/RegresParamMap
-#        test/scripts/applications/impute/RegresCoeffDefault
-#        test/scripts/applications/impute/RegresScaleMult
-
-is_GROUP_4_ENABLED = 1;   #   = 1 or 0
-num_known_terms = 6;      # The number of   known   term reports, feel free to change
-num_predicted_terms = 1;  # The number of predicted term reports, feel free to change
-
-num_terms = num_known_terms + num_predicted_terms;
-num_attrs = 19;  
-
-num_frees_per_term = 13;
-if (is_GROUP_4_ENABLED == 1) {
-    num_frees_per_term = 15;
-}
-num_frees = num_predicted_terms * num_frees_per_term;
-
-zero = matrix (0.0, rows = 1, cols = 1);
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
-# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
-# ---------------------------------------------------------
-
-CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
-
-for (dt in 1:num_predicted_terms)
-{
-    ta_shift = (num_known_terms + dt - 1) * num_attrs;
-    fv_shift = (dt - 1) * num_frees_per_term;
-# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
-# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
-    CReps [ta_shift +  1, fv_shift +  1] = 1.0 + zero;
-    CReps [ta_shift +  1, fv_shift +  2] = 1.0 + zero;
-    CReps [ta_shift +  1, fv_shift +  3] = 1.0 + zero;
-    CReps [ta_shift +  1, fv_shift +  4] = 1.0 + zero;
-    CReps [ta_shift +  1, fv_shift +  5] = 1.0 + zero;
-    CReps [ta_shift +  1, fv_shift +  6] = 1.0 + zero;
-    CReps [ta_shift +  2, fv_shift +  1] = 1.0 + zero;
-    CReps [ta_shift +  3, fv_shift +  2] = 1.0 + zero;
-    CReps [ta_shift +  4, fv_shift +  3] = 1.0 + zero;
-    CReps [ta_shift +  5, fv_shift +  4] = 1.0 + zero;
-    CReps [ta_shift +  6, fv_shift +  5] = 1.0 + zero;
-    CReps [ta_shift +  7, fv_shift +  6] = 1.0 + zero;
-
-# row 8 is free variable not appearing in any non-free variable
-    CReps [ta_shift +  8, fv_shift +  7] = 1.0 + zero;
-
-# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
-# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
-    CReps [ta_shift +  9, fv_shift +  8] = 1.0 + zero;
-    CReps [ta_shift +  9, fv_shift +  9] = 1.0 + zero;
-    CReps [ta_shift +  9, fv_shift + 10] = 1.0 + zero;
-    CReps [ta_shift +  9, fv_shift + 11] = 1.0 + zero;
-    CReps [ta_shift +  9, fv_shift + 12] = 1.0 + zero;
-    CReps [ta_shift +  9, fv_shift + 13] = 1.0 + zero;
-    CReps [ta_shift + 10, fv_shift +  8] = 1.0 + zero;
-    CReps [ta_shift + 11, fv_shift +  9] = 1.0 + zero;
-    CReps [ta_shift + 12, fv_shift + 10] = 1.0 + zero;
-    CReps [ta_shift + 13, fv_shift + 11] = 1.0 + zero;
-    CReps [ta_shift + 14, fv_shift + 12] = 1.0 + zero;
-    CReps [ta_shift + 15, fv_shift + 13] = 1.0 + zero;
-
-# constraint that          row16 =  row14 +  row15
-# translated to free vars: row16 = free14 + free15
-    if (is_GROUP_4_ENABLED == 1) {
-        CReps [ta_shift + 16, fv_shift + 14] = 1.0 + zero;
-        CReps [ta_shift + 16, fv_shift + 15] = 1.0 + zero;
-        CReps [ta_shift + 17, fv_shift + 14] = 1.0 + zero;
-        CReps [ta_shift + 18, fv_shift + 15] = 1.0 + zero;
-    }
-
-# constraint that           row19 = total cost (all free variables)
-# translated to free vars:  row19 = all free variables
-    CReps [ta_shift + 19, fv_shift +  1] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  2] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  3] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  4] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  5] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  6] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  7] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  8] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift +  9] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift + 10] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift + 11] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift + 12] = 1.0 + zero;
-    CReps [ta_shift + 19, fv_shift + 13] = 1.0 + zero;
-    if (is_GROUP_4_ENABLED == 1) {
-        CReps [ta_shift + 19, fv_shift + 14] = 1.0 + zero;
-        CReps [ta_shift + 19, fv_shift + 15] = 1.0 + zero;
-    }
-}
-
-# ---------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
-# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
-# ---------------------------------------------------------
-
-# In all regressions, except the last few "special" ones, there are 4 factors:
-# x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
-# The last regressions are for regularization, but they also follow the 4-factor pattern.
-num_factors = 4; 
-
-# We have one regression equation per time-term for each attribute, 
-# plus a few "special" regularization regression equations:
-num_special_regs = 12;
-if (is_GROUP_4_ENABLED == 1) {
-    num_special_regs = 16;
-}
-
-num_reg_eqs = num_terms * num_attrs + num_special_regs;
-
-RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
-RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-# All regression equations for the same attribute share the same parameters, regardless
-# of the term; some parameters are shared across multiple attributes, (those attributes
-# whose behavior is believed to be similar) as specified in the table below:
-
-num_params = 28;
-if (is_GROUP_4_ENABLED == 1) {
-    num_params = 35;
-}
-
-# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
-#                                          self[t-2]
-# PARAMS:
-# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
-# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
-# Group 1:    "      prm#03       "          "       Row #03 = free#02
-# Group 1:    "      prm#04       "          "       Row #04 = free#03
-# Group 1:    "      prm#05       "          "       Row #05 = free#04
-# Group 1:    "      prm#06       "          "       Row #06 = free#05
-# Group 1:    "      prm#07       "          "       Row #07 = free#06
-# --------------------------------------------------------------------
-# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
-# --------------------------------------------------------------------
-# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
-# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
-# Group 3:    "      prm#17       "          "       Row #11 = free#09
-# Group 3:    "      prm#18       "          "       Row #12 = free#10
-# Group 3:    "      prm#19       "          "       Row #13 = free#11
-# Group 3:    "      prm#20       "          "       Row #14 = free#12
-# Group 3:    "      prm#21       "          "       Row #15 = free#13
-# --------------------------------------------------------------------
-# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
-# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
-# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
-# Group 4:    "      prm#31       "          "       Row #18 = free#15
-# --------------------------------------------------------------------
-# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
-# 
-# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
-#  the total cost in Group 5 regresses on the intercept.)
-
-# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
-# Factors:   1.0      -1.0       0.0        0.0
-# PARAMS:
-#          prm#27      1.0       0.0        0.0
-#          prm#28      0.0       0.0        0.0
-#          prm#08      0.0       0.0        0.0
-#          prm#09      0.0       0.0        0.0
-#          prm#10      0.0       0.0        0.0
-#          prm#11      0.0       0.0        0.0
-#          prm#13      0.0       0.0        0.0
-#          prm#14      0.0       0.0        0.0
-#          prm#22      0.0       0.0        0.0
-#          prm#23      0.0       0.0        0.0
-#          prm#24      0.0       0.0        0.0
-#          prm#25      0.0       0.0        0.0
-#          prm#32      0.0       0.0        0.0  # GROUP-4 ZEROS:
-#          prm#33      0.0       0.0        0.0  #   THESE EQUATIONS
-#          prm#34      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
-#          prm#35      0.0       0.0        0.0  #   AND DO NOT APPEAR
-
-
-
-for (t in 1 : num_terms)
-{
-# Group 1 attributes:
-    for (i in 1 : 7) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
-        if (i == 1) {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs +  1] =  1.0 + zero; # 2nd factor: Row#01[t]
-        }
-        if (t == 1) {
-            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
-        } else {
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i] =  1.0 + zero; # 3rd factor: x[t-1]
-        }
-        if (t >= 3) {
-            RegresValueMap [reg_index + 4, (t-2) * num_attrs +  i] =  1.0 + zero; # 4th factor is
-            RegresValueMap [reg_index + 4, (t-3) * num_attrs +  i] = -1.0 + zero; #   x[t-1] - x[t-2]
-        }
-    }
-
-# Group 2 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
-    RegresValueMap [reg_index + 1, (t-1) * num_attrs +  8]     = -1.0 + zero;  # 1st factor is -x[t]
-    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19]     =  1.0 + zero;  # 2nd factor: Row#19[t]
-    if (t == 1) {
-        RegresValueMap [reg_index + 3, 8] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
-    } else {
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs +  8] =  1.0 + zero;  # 3rd factor: x[t-1]
-    }
-    if (t >= 3) {
-        RegresValueMap [reg_index + 4, (t-2) * num_attrs +  8] =  1.0 + zero;  # 4th factor is
-        RegresValueMap [reg_index + 4, (t-3) * num_attrs +  8] = -1.0 + zero;  #   x[t-1] - x[t-2]
-    }
-
-# Group 3 attributes:
-    for (i in 9 : 15) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
-        if (i == 9) {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs +  9] = 1.0 + zero; # 2nd factor: Row#09[t]
-        }
-        if (t == 1) {
-            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
-        } else {
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
-        }
-        if (t >= 3) {
-            RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
-            RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
-        }
-    }
-
-# Group 4 attributes:
-    for (i in 16 : 18) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
-        if (i == 16) {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t]
-        } else {
-            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t]
-        }
-        if (t == 1) {
-            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
-        } else {
-            RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
-        }
-        if (t >= 3) {
-            RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
-            RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
-        }
-    }
-
-# Group 5 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
-    if (t >= 2) {
-        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t]
-        RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
-        RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero; # 3rd factor: x[t-1]
-    }
-    if (t >= 3) {
-        RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] =  1.0 + zero; # 4th factor is
-        RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; #   x[t-1] - x[t-2]
-    }
-}
-
-reg_index = num_terms * num_attrs * num_factors;
-for (i in 1:num_special_regs)
-{
-    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
-    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
-    reg_index = reg_index + num_factors;
-}
-
-# ----------------------------------------------------------
-# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
-# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
-# ----------------------------------------------------------
-
-RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
-RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
-
-for (t in 1 : num_terms) {
-# Group 1 attributes:
-    reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
-    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
-    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
-    for (i in 2 : 7) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
-        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
-        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
-    }
-# Group 2 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
-    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
-    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
-# Group 3 attributes:
-    reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
-    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
-    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
-    for (i in 10 : 15) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
-        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
-        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
-    }
-    
-# Group 4 attributes:
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
-    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
-    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
-    for (i in 17 : 18) {
-        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
-        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
-        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
-        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
-        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
-    }
-}
-
-# Group 5 attribute:
-    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
-    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
-    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
-    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
-    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
-}
-
-reg_index = num_terms * num_attrs * num_factors;
-    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
-    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;  # Param #28
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;  # Param #08
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;  # Param #09
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;  # Param #10
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;  # Param #11
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;  # Param #14
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 22] = 1.0 + zero;  # Param #22
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;  # Param #23
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;  # Param #24
-reg_index = reg_index + num_factors;
-    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;  # Param #25
-
-if (is_GROUP_4_ENABLED == 1) {
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 34] = 1.0 + zero;  # Param #34
-    reg_index = reg_index + num_factors;
-        RegresParamMap [reg_index + 1, 35] = 1.0 + zero;  # Param #35
-}
-
-# ----------------------------------------------------------
-# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
-# ----------------------------------------------------------
-
-RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
-initial_reports = read ($1);
-
-global_weight = 0.5 + zero;
-
-attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
-max_attr_size = max (attribute_size);
-
-for (t in 1 : num_terms) {
-    for (i in 1 : num_attrs) {
-        regeqn = (t-1) * num_attrs + i;
-        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
-        acceptable_drift = scale_down * max_attr_size * 0.001;
-        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-    }
-}
-
-for (i in 1 : num_special_regs) {
-    regeqn = num_terms * num_attrs + i;
-    acceptable_drift = 0.01;
-    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
-}
-
-# --------------------------------
-# WRITE OUT ALL GENERATED MATRICES
-# --------------------------------
-
-# write (initial_reports,    $1, format="text");
-write (CReps,              $2, format="text");
-write (RegresValueMap,     $3, format="text");
-write (RegresFactorDefault,$4, format="text");
-write (RegresParamMap,     $5, format="text");
-write (RegresCoeffDefault, $6, format="text");
-write (RegresScaleMult,    $7, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode
+#    -args
+#        test/scripts/applications/impute/initial_reports
+#        test/scripts/applications/impute/CReps 
+#        test/scripts/applications/impute/RegresValueMap
+#        test/scripts/applications/impute/RegresFactorDefault
+#        test/scripts/applications/impute/RegresParamMap
+#        test/scripts/applications/impute/RegresCoeffDefault
+#        test/scripts/applications/impute/RegresScaleMult
+
+is_GROUP_4_ENABLED = 1;   #   = 1 or 0
+num_known_terms = 6;      # The number of   known   term reports, feel free to change
+num_predicted_terms = 1;  # The number of predicted term reports, feel free to change
+
+num_terms = num_known_terms + num_predicted_terms;
+num_attrs = 19;  
+
+num_frees_per_term = 13;
+if (is_GROUP_4_ENABLED == 1) {
+    num_frees_per_term = 15;
+}
+num_frees = num_predicted_terms * num_frees_per_term;
+
+zero = matrix (0.0, rows = 1, cols = 1);
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS
+# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS
+# ---------------------------------------------------------
+
+CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees);
+
+for (dt in 1:num_predicted_terms)
+{
+    ta_shift = (num_known_terms + dt - 1) * num_attrs;
+    fv_shift = (dt - 1) * num_frees_per_term;
+# constraint that          row1 =  row2 +  row3 +  row4 +  row5 +  row6 + row7
+# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6
+    CReps [ta_shift +  1, fv_shift +  1] = 1.0 + zero;
+    CReps [ta_shift +  1, fv_shift +  2] = 1.0 + zero;
+    CReps [ta_shift +  1, fv_shift +  3] = 1.0 + zero;
+    CReps [ta_shift +  1, fv_shift +  4] = 1.0 + zero;
+    CReps [ta_shift +  1, fv_shift +  5] = 1.0 + zero;
+    CReps [ta_shift +  1, fv_shift +  6] = 1.0 + zero;
+    CReps [ta_shift +  2, fv_shift +  1] = 1.0 + zero;
+    CReps [ta_shift +  3, fv_shift +  2] = 1.0 + zero;
+    CReps [ta_shift +  4, fv_shift +  3] = 1.0 + zero;
+    CReps [ta_shift +  5, fv_shift +  4] = 1.0 + zero;
+    CReps [ta_shift +  6, fv_shift +  5] = 1.0 + zero;
+    CReps [ta_shift +  7, fv_shift +  6] = 1.0 + zero;
+
+# row 8 is free variable not appearing in any non-free variable
+    CReps [ta_shift +  8, fv_shift +  7] = 1.0 + zero;
+
+# constraint that          row9 = row10 + row11 +  row12 +  row13 +  row14 +  row15
+# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13
+    CReps [ta_shift +  9, fv_shift +  8] = 1.0 + zero;
+    CReps [ta_shift +  9, fv_shift +  9] = 1.0 + zero;
+    CReps [ta_shift +  9, fv_shift + 10] = 1.0 + zero;
+    CReps [ta_shift +  9, fv_shift + 11] = 1.0 + zero;
+    CReps [ta_shift +  9, fv_shift + 12] = 1.0 + zero;
+    CReps [ta_shift +  9, fv_shift + 13] = 1.0 + zero;
+    CReps [ta_shift + 10, fv_shift +  8] = 1.0 + zero;
+    CReps [ta_shift + 11, fv_shift +  9] = 1.0 + zero;
+    CReps [ta_shift + 12, fv_shift + 10] = 1.0 + zero;
+    CReps [ta_shift + 13, fv_shift + 11] = 1.0 + zero;
+    CReps [ta_shift + 14, fv_shift + 12] = 1.0 + zero;
+    CReps [ta_shift + 15, fv_shift + 13] = 1.0 + zero;
+
+# constraint that          row16 =  row14 +  row15
+# translated to free vars: row16 = free14 + free15
+    if (is_GROUP_4_ENABLED == 1) {
+        CReps [ta_shift + 16, fv_shift + 14] = 1.0 + zero;
+        CReps [ta_shift + 16, fv_shift + 15] = 1.0 + zero;
+        CReps [ta_shift + 17, fv_shift + 14] = 1.0 + zero;
+        CReps [ta_shift + 18, fv_shift + 15] = 1.0 + zero;
+    }
+
+# constraint that           row19 = total cost (all free variables)
+# translated to free vars:  row19 = all free variables
+    CReps [ta_shift + 19, fv_shift +  1] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  2] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  3] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  4] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  5] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  6] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  7] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  8] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift +  9] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift + 10] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift + 11] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift + 12] = 1.0 + zero;
+    CReps [ta_shift + 19, fv_shift + 13] = 1.0 + zero;
+    if (is_GROUP_4_ENABLED == 1) {
+        CReps [ta_shift + 19, fv_shift + 14] = 1.0 + zero;
+        CReps [ta_shift + 19, fv_shift + 15] = 1.0 + zero;
+    }
+}
+
+# ---------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS
+# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS
+# ---------------------------------------------------------
+
+# In all regressions, except the last few "special" ones, there are 4 factors:
+# x[t]  ~  aggregate[t], x[t-1],  (x[t-1] - x[t-2])
+# The last regressions are for regularization, but they also follow the 4-factor pattern.
+num_factors = 4; 
+
+# We have one regression equation per time-term for each attribute, 
+# plus a few "special" regularization regression equations:
+num_special_regs = 12;
+if (is_GROUP_4_ENABLED == 1) {
+    num_special_regs = 16;
+}
+
+num_reg_eqs = num_terms * num_attrs + num_special_regs;
+
+RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs));
+RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+# All regression equations for the same attribute share the same parameters, regardless
+# of the term; some parameters are shared across multiple attributes, (those attributes
+# whose behavior is believed to be similar) as specified in the table below:
+
+num_params = 28;
+if (is_GROUP_4_ENABLED == 1) {
+    num_params = 35;
+}
+
+# Factors: -self[t]  total[t]  self[t-1]  self[t-1]-
+#                                          self[t-2]
+# PARAMS:
+# Group 1:   1.0     prm#01     prm#08     prm#09    Row #01 = free#01 + ... + free#06
+# Group 1:    "      prm#02     prm#10     prm#11    Row #02 = free#01
+# Group 1:    "      prm#03       "          "       Row #03 = free#02
+# Group 1:    "      prm#04       "          "       Row #04 = free#03
+# Group 1:    "      prm#05       "          "       Row #05 = free#04
+# Group 1:    "      prm#06       "          "       Row #06 = free#05
+# Group 1:    "      prm#07       "          "       Row #07 = free#06
+# --------------------------------------------------------------------
+# Group 2:   1.0     prm#12     prm#13     prm#14    Row #08 = free#07
+# --------------------------------------------------------------------
+# Group 3:   1.0     prm#15     prm#22     prm#23    Row #09 = free#08 + ... + free#13
+# Group 3:    "      prm#16     prm#24     prm#25    Row #10 = free#08
+# Group 3:    "      prm#17       "          "       Row #11 = free#09
+# Group 3:    "      prm#18       "          "       Row #12 = free#10
+# Group 3:    "      prm#19       "          "       Row #13 = free#11
+# Group 3:    "      prm#20       "          "       Row #14 = free#12
+# Group 3:    "      prm#21       "          "       Row #15 = free#13
+# --------------------------------------------------------------------
+# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED
+# Group 4:   1.0     prm#29     prm#32     prm#33    Row #16 = free#14 + free#15
+# Group 4:    "      prm#30     prm#34     prm#35    Row #17 = free#14
+# Group 4:    "      prm#31       "          "       Row #18 = free#15
+# --------------------------------------------------------------------
+# Group 5:   1.0     prm#26     prm#27     prm#28    Row #19 = free#01 + ... + free#15
+# 
+# (The aggregates in Groups 1..4 regress on the total cost in Group 5;
+#  the total cost in Group 5 regresses on the intercept.)
+
+# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS:
+# Factors:   1.0      -1.0       0.0        0.0
+# PARAMS:
+#          prm#27      1.0       0.0        0.0
+#          prm#28      0.0       0.0        0.0
+#          prm#08      0.0       0.0        0.0
+#          prm#09      0.0       0.0        0.0
+#          prm#10      0.0       0.0        0.0
+#          prm#11      0.0       0.0        0.0
+#          prm#13      0.0       0.0        0.0
+#          prm#14      0.0       0.0        0.0
+#          prm#22      0.0       0.0        0.0
+#          prm#23      0.0       0.0        0.0
+#          prm#24      0.0       0.0        0.0
+#          prm#25      0.0       0.0        0.0
+#          prm#32      0.0       0.0        0.0  # GROUP-4 ZEROS:
+#          prm#33      0.0       0.0        0.0  #   THESE EQUATIONS
+#          prm#34      0.0       0.0        0.0  #   USE REVOKED PARAMETERS
+#          prm#35      0.0       0.0        0.0  #   AND DO NOT APPEAR
+
+
+
+for (t in 1 : num_terms)
+{
+# Group 1 attributes:
+    for (i in 1 : 7) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
+        if (i == 1) {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] =  1.0 + zero; # 2nd factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs +  1] =  1.0 + zero; # 2nd factor: Row#01[t]
+        }
+        if (t == 1) {
+            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
+        } else {
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs +  i] =  1.0 + zero; # 3rd factor: x[t-1]
+        }
+        if (t >= 3) {
+            RegresValueMap [reg_index + 4, (t-2) * num_attrs +  i] =  1.0 + zero; # 4th factor is
+            RegresValueMap [reg_index + 4, (t-3) * num_attrs +  i] = -1.0 + zero; #   x[t-1] - x[t-2]
+        }
+    }
+
+# Group 2 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
+    RegresValueMap [reg_index + 1, (t-1) * num_attrs +  8]     = -1.0 + zero;  # 1st factor is -x[t]
+    RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19]     =  1.0 + zero;  # 2nd factor: Row#19[t]
+    if (t == 1) {
+        RegresValueMap [reg_index + 3, 8] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
+    } else {
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs +  8] =  1.0 + zero;  # 3rd factor: x[t-1]
+    }
+    if (t >= 3) {
+        RegresValueMap [reg_index + 4, (t-2) * num_attrs +  8] =  1.0 + zero;  # 4th factor is
+        RegresValueMap [reg_index + 4, (t-3) * num_attrs +  8] = -1.0 + zero;  #   x[t-1] - x[t-2]
+    }
+
+# Group 3 attributes:
+    for (i in 9 : 15) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
+        if (i == 9) {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs +  9] = 1.0 + zero; # 2nd factor: Row#09[t]
+        }
+        if (t == 1) {
+            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
+        } else {
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
+        }
+        if (t >= 3) {
+            RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
+            RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
+        }
+    }
+
+# Group 4 attributes:
+    for (i in 16 : 18) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero;  # 1st factor is -x[t]
+        if (i == 16) {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t]
+        } else {
+            RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t]
+        }
+        if (t == 1) {
+            RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1]
+        } else {
+            RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] =  1.0 + zero; # 3rd factor: x[t-1]
+        }
+        if (t >= 3) {
+            RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] =  1.0 + zero; # 4th factor is
+            RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; #   x[t-1] - x[t-2]
+        }
+    }
+
+# Group 5 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
+    if (t >= 2) {
+        RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t]
+        RegresFactorDefault [reg_index + 2, 1]                 =  1.0 + zero; # 2nd factor: Intercept
+        RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] =  1.0 + zero; # 3rd factor: x[t-1]
+    }
+    if (t >= 3) {
+        RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] =  1.0 + zero; # 4th factor is
+        RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; #   x[t-1] - x[t-2]
+    }
+}
+
+reg_index = num_terms * num_attrs * num_factors;
+for (i in 1:num_special_regs)
+{
+    RegresFactorDefault [reg_index + 1, 1] =  1.0 + zero;
+    RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero;
+    reg_index = reg_index + num_factors;
+}
+
+# ----------------------------------------------------------
+# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS
+# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS
+# ----------------------------------------------------------
+
+RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params);
+RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1);
+
+for (t in 1 : num_terms) {
+# Group 1 attributes:
+    reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2,  1]    = 1.0 + zero;  # Param #01
+    RegresParamMap [reg_index + 3,  8]    = 1.0 + zero;  # Param #08
+    RegresParamMap [reg_index + 4,  9]    = 1.0 + zero;  # Param #09
+    for (i in 2 : 7) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  i]     = 1.0 + zero;  # Param #02-#07
+        RegresParamMap [reg_index + 3, 10]     = 1.0 + zero;  # Param #10
+        RegresParamMap [reg_index + 4, 11]     = 1.0 + zero;  # Param #11
+    }
+# Group 2 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 12] = 1.0 + zero;  # Param #12
+    RegresParamMap [reg_index + 3, 13] = 1.0 + zero;  # Param #13
+    RegresParamMap [reg_index + 4, 14] = 1.0 + zero;  # Param #14
+# Group 3 attributes:
+    reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 15]     = 1.0 + zero;  # Param #17
+    RegresParamMap [reg_index + 3, 22]     = 1.0 + zero;  # Param #22
+    RegresParamMap [reg_index + 4, 23]     = 1.0 + zero;  # Param #23
+    for (i in 10 : 15) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2,  6 + i] = 1.0 + zero;  # Param #16-#21
+        RegresParamMap [reg_index + 3, 24]     = 1.0 + zero;  # Param #24
+        RegresParamMap [reg_index + 4, 25]     = 1.0 + zero;  # Param #25
+    }
+    
+# Group 4 attributes:
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 29]     = 1.0 + zero;  # Param #29
+    RegresParamMap [reg_index + 3, 32]     = 1.0 + zero;  # Param #32
+    RegresParamMap [reg_index + 4, 33]     = 1.0 + zero;  # Param #33
+    for (i in 17 : 18) {
+        reg_index = ((t-1) * num_attrs - 1 + i) * num_factors;
+        RegresCoeffDefault [reg_index + 1, 1]  = 1.0 + zero;  # Default coefficient = 1.0
+        RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero;  # Param #30-#31
+        RegresParamMap [reg_index + 3, 34]     = 1.0 + zero;  # Param #34
+        RegresParamMap [reg_index + 4, 35]     = 1.0 + zero;  # Param #35
+    }
+}
+
+# Group 5 attribute:
+    reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors;
+    RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero;  # Default coefficient = 1.0
+    RegresParamMap [reg_index + 2, 26] = 1.0 + zero;  # Param #26
+    RegresParamMap [reg_index + 3, 27] = 1.0 + zero;  # Param #27
+    RegresParamMap [reg_index + 4, 28] = 1.0 + zero;  # Param #28
+}
+
+reg_index = num_terms * num_attrs * num_factors;
+    RegresParamMap [reg_index + 1, 27] = 1.0 + zero;  # Param #27
+    RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero;
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 28] = 1.0 + zero;  # Param #28
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 08] = 1.0 + zero;  # Param #08
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 09] = 1.0 + zero;  # Param #09
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 10] = 1.0 + zero;  # Param #10
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 11] = 1.0 + zero;  # Param #11
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 13] = 1.0 + zero;  # Param #13
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 14] = 1.0 + zero;  # Param #14
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 22] = 1.0 + zero;  # Param #22
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 23] = 1.0 + zero;  # Param #23
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 24] = 1.0 + zero;  # Param #24
+reg_index = reg_index + num_factors;
+    RegresParamMap [reg_index + 1, 25] = 1.0 + zero;  # Param #25
+
+if (is_GROUP_4_ENABLED == 1) {
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 32] = 1.0 + zero;  # Param #32
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 33] = 1.0 + zero;  # Param #33
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 34] = 1.0 + zero;  # Param #34
+    reg_index = reg_index + num_factors;
+        RegresParamMap [reg_index + 1, 35] = 1.0 + zero;  # Param #35
+}
+
+# ----------------------------------------------------------
+# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION
+# ----------------------------------------------------------
+
+RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1);
+initial_reports = read ($1);
+
+global_weight = 0.5 + zero;
+
+attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms]));
+max_attr_size = max (attribute_size);
+
+for (t in 1 : num_terms) {
+    for (i in 1 : num_attrs) {
+        regeqn = (t-1) * num_attrs + i;
+        scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001;
+        acceptable_drift = scale_down * max_attr_size * 0.001;
+        RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+    }
+}
+
+for (i in 1 : num_special_regs) {
+    regeqn = num_terms * num_attrs + i;
+    acceptable_drift = 0.01;
+    RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2);
+}
+
+# --------------------------------
+# WRITE OUT ALL GENERATED MATRICES
+# --------------------------------
+
+# write (initial_reports,    $1, format="text");
+write (CReps,              $2, format="text");
+write (RegresValueMap,     $3, format="text");
+write (RegresFactorDefault,$4, format="text");
+write (RegresParamMap,     $5, format="text");
+write (RegresCoeffDefault, $6, format="text");
+write (RegresScaleMult,    $7, format="text");


[04/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_while.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_while.R b/src/test/scripts/functions/recompile/constant_propagation_while.R
index d764d8f..a6bfa9a 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_while.R
+++ b/src/test/scripts/functions/recompile/constant_propagation_while.R
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-numrows = as.integer(args[1]);
-numcols = as.integer(args[2]);
-
-i = 1;
-while( i<3 )
-{
-   numrows = numrows + 1;
-   numcols = numcols + 2;
-   i = i + 1;
-} 
-
-X = matrix(1, numrows, numcols);
-
-writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+numrows = as.integer(args[1]);
+numcols = as.integer(args[2]);
+
+i = 1;
+while( i<3 )
+{
+   numrows = numrows + 1;
+   numcols = numcols + 2;
+   i = i + 1;
+} 
+
+X = matrix(1, numrows, numcols);
+
+writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/constant_propagation_while.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/constant_propagation_while.dml b/src/test/scripts/functions/recompile/constant_propagation_while.dml
index 98012a0..d6ae610 100644
--- a/src/test/scripts/functions/recompile/constant_propagation_while.dml
+++ b/src/test/scripts/functions/recompile/constant_propagation_while.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-numrows = $1;
-numcols = $2;
-
-i = 1;
-while( i<3 )
-{
-   numrows = numrows + 1;
-   numcols = numcols + 2;
-   i = i + 1;
-}  
-
-X = matrix(1, rows=numrows, cols=numcols);
-
-write(X, $3);       
+
+numrows = $1;
+numcols = $2;
+
+i = 1;
+while( i<3 )
+{
+   numrows = numrows + 1;
+   numcols = numcols + 2;
+   i = i + 1;
+}  
+
+X = matrix(1, rows=numrows, cols=numcols);
+
+write(X, $3);       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/csv_read_unknown.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/csv_read_unknown.dml b/src/test/scripts/functions/recompile/csv_read_unknown.dml
index 08de77b..bcac3ae 100644
--- a/src/test/scripts/functions/recompile/csv_read_unknown.dml
+++ b/src/test/scripts/functions/recompile/csv_read_unknown.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-
-#test for multiple parents
-R = X + X;
-R = R - X;
-
-write(R, $2);    
-
-
-
-
+
+X = read($1);
+
+#test for multiple parents
+R = X + X;
+R = R - X;
+
+write(R, $2);    
+
+
+
+
    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/for_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/for_recompile.dml b/src/test/scripts/functions/recompile/for_recompile.dml
index 04a4f10..96d7dac 100644
--- a/src/test/scripts/functions/recompile/for_recompile.dml
+++ b/src/test/scripts/functions/recompile/for_recompile.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
-Z = Rand(rows=1,cols=1,min=0,max=0);
-for( i in $3:castAsScalar(V[1,1]) )
-{
-   Z[1,1] = V[1,1]; 
-}  
+
+V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
+Z = Rand(rows=1,cols=1,min=0,max=0);
+for( i in $3:castAsScalar(V[1,1]) )
+{
+   Z[1,1] = V[1,1]; 
+}  
 write(Z, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/for_recompile_func_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/for_recompile_func_sparse.dml b/src/test/scripts/functions/recompile/for_recompile_func_sparse.dml
index 2eb38ad..3ab662b 100644
--- a/src/test/scripts/functions/recompile/for_recompile_func_sparse.dml
+++ b/src/test/scripts/functions/recompile/for_recompile_func_sparse.dml
@@ -19,36 +19,36 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function (Matrix[Double] X)
-    return (Matrix[Double] Y)
-{  
-   V = X;
-   print(sum(V)); 
-   for( i in 1:1 )
-   {
-      print(sum(V));      
-      V = foo2(V,i);
-   }   
-   Y = V;  
-}
-
-foo2 = function (Matrix[Double] X, Integer i)
-    return (Matrix[Double] Y)
-{  
-   V = X;     
-   for( j in 1:1 )
-   {
-      V[i,j] = $2; 
-   } 
-   Y = V;   
-}
-
-V = read($1);
-V = foo1(V); 
-write(V, $3);    
-
-
-
-
+
+foo1 = function (Matrix[Double] X)
+    return (Matrix[Double] Y)
+{  
+   V = X;
+   print(sum(V)); 
+   for( i in 1:1 )
+   {
+      print(sum(V));      
+      V = foo2(V,i);
+   }   
+   Y = V;  
+}
+
+foo2 = function (Matrix[Double] X, Integer i)
+    return (Matrix[Double] Y)
+{  
+   V = X;     
+   for( j in 1:1 )
+   {
+      V[i,j] = $2; 
+   } 
+   Y = V;   
+}
+
+V = read($1);
+V = foo1(V); 
+write(V, $3);    
+
+
+
+
    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/for_recompile_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/for_recompile_sparse.dml b/src/test/scripts/functions/recompile/for_recompile_sparse.dml
index 7ce9550..05bdb74 100644
--- a/src/test/scripts/functions/recompile/for_recompile_sparse.dml
+++ b/src/test/scripts/functions/recompile/for_recompile_sparse.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-print(sum(V)); 
-for( i in 1:1 )
-{
-   print(sum(V)); 
-   
-   for( j in 1:1 )
-   {
-      V[i,j] = $2; 
-   }
-}  
-write(V, $3);    
-
-
-
-
+
+V = read($1);
+print(sum(V)); 
+for( i in 1:1 )
+{
+   print(sum(V)); 
+   
+   for( j in 1:1 )
+   {
+      V[i,j] = $2; 
+   }
+}  
+write(V, $3);    
+
+
+
+
    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/funct_recompile.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/funct_recompile.R b/src/test/scripts/functions/recompile/funct_recompile.R
index 830fb96..0f1e010 100644
--- a/src/test/scripts/functions/recompile/funct_recompile.R
+++ b/src/test/scripts/functions/recompile/funct_recompile.R
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-n <- ncol(V); 
-
-R <- array(0,dim=c(n,1))
-
-for( i in 1:n )
-{
-   X <- V[2:nrow(V),i];                 
-   R[i,1] <- sum(X %*% t(X));
-}   
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+n <- ncol(V); 
+
+R <- array(0,dim=c(n,1))
+
+for( i in 1:n )
+{
+   X <- V[2:nrow(V),i];                 
+   R[i,1] <- sum(X %*% t(X));
+}   
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/funct_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/funct_recompile.dml b/src/test/scripts/functions/recompile/funct_recompile.dml
index 2059181..90f757f 100644
--- a/src/test/scripts/functions/recompile/funct_recompile.dml
+++ b/src/test/scripts/functions/recompile/funct_recompile.dml
@@ -19,33 +19,33 @@
 #
 #-------------------------------------------------------------
 
-
-execFun = function(Matrix[Double] Xin) return (Double sx) 
-{
-   X = Xin[2:nrow(Xin),]
-
-   if( nrow(X)>1 )
-   {
-      Y = X %*% t(X);
-      sx = sum(Y);
-   }
-   else
-   {
-      sx = 0;
-   }
-}
-
-V = read($1, rows=$2, cols=$3);
-
-R = Rand(rows=$3,cols=1,min=0,max=0); 
-dummy = Rand(rows=1, cols=1, min=1, max=1);
-
-for( i in 1:$3 ) 
-{
-   X = V[,i];
-   sumx = execFun(X);
-   
-   R[i,1] = dummy * sumx; 
-}  
-
+
+execFun = function(Matrix[Double] Xin) return (Double sx) 
+{
+   X = Xin[2:nrow(Xin),]
+
+   if( nrow(X)>1 )
+   {
+      Y = X %*% t(X);
+      sx = sum(Y);
+   }
+   else
+   {
+      sx = 0;
+   }
+}
+
+V = read($1, rows=$2, cols=$3);
+
+R = Rand(rows=$3,cols=1,min=0,max=0); 
+dummy = Rand(rows=1, cols=1, min=1, max=1);
+
+for( i in 1:$3 ) 
+{
+   X = V[,i];
+   sumx = execFun(X);
+   
+   R[i,1] = dummy * sumx; 
+}  
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/grpagg_rand_recompile.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/grpagg_rand_recompile.R b/src/test/scripts/functions/recompile/grpagg_rand_recompile.R
index 70c20d7..0f06ea4 100644
--- a/src/test/scripts/functions/recompile/grpagg_rand_recompile.R
+++ b/src/test/scripts/functions/recompile/grpagg_rand_recompile.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-
-Y = as.matrix(aggregate(X[,1] ~ X[,2], data=X, length)[,1]);
-Z = matrix(7, nrow(Y)+ncol(Y), nrow(Y)+ncol(Y)+1);
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+
+Y = as.matrix(aggregate(X[,1] ~ X[,2], data=X, length)[,1]);
+Z = matrix(7, nrow(Y)+ncol(Y), nrow(Y)+ncol(Y)+1);
+
 writeMM(as(Z, "CsparseMatrix"), paste(args[2], "Z", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/grpagg_rand_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/grpagg_rand_recompile.dml b/src/test/scripts/functions/recompile/grpagg_rand_recompile.dml
index 69a5a90..0dce630 100644
--- a/src/test/scripts/functions/recompile/grpagg_rand_recompile.dml
+++ b/src/test/scripts/functions/recompile/grpagg_rand_recompile.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=2);
-
-Y = aggregate(target=X[,1],groups=X[,2], fn="count");
-Z = matrix(7, rows=nrow(Y)+ncol(Y), cols=nrow(Y)+ncol(Y)+1);
-
+
+X = read($1, rows=$2, cols=2);
+
+Y = aggregate(target=X[,1],groups=X[,2], fn="count");
+Z = matrix(7, rows=nrow(Y)+ncol(Y), cols=nrow(Y)+ncol(Y)+1);
+
 write(Z, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/if_branch_removal.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/if_branch_removal.R b/src/test/scripts/functions/recompile/if_branch_removal.R
index 2eefbff..5a771d1 100644
--- a/src/test/scripts/functions/recompile/if_branch_removal.R
+++ b/src/test/scripts/functions/recompile/if_branch_removal.R
@@ -19,30 +19,30 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-
-if( as.integer(args[2])==1 )
-{
-   v = matrix(1,nrow(X),1);
-   X = as.matrix(cbind(X, v));
-}
-
-if( as.integer(args[2])!=1 )
-{
-   v = matrix(1,nrow(X),1);
-   X = as.matrix(cbind(X, v));
-} else
-{
-   v1 = matrix(1,nrow(X),1);
-   X = as.matrix(cbind(X, v1));
-   v2 = matrix(1,nrow(X),1);
-   X = as.matrix(cbind(X, v2));
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+
+if( as.integer(args[2])==1 )
+{
+   v = matrix(1,nrow(X),1);
+   X = as.matrix(cbind(X, v));
+}
+
+if( as.integer(args[2])!=1 )
+{
+   v = matrix(1,nrow(X),1);
+   X = as.matrix(cbind(X, v));
+} else
+{
+   v1 = matrix(1,nrow(X),1);
+   X = as.matrix(cbind(X, v1));
+   v2 = matrix(1,nrow(X),1);
+   X = as.matrix(cbind(X, v2));
+}
+
 writeMM(as(X, "CsparseMatrix"), paste(args[3], "X", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/if_branch_removal.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/if_branch_removal.dml b/src/test/scripts/functions/recompile/if_branch_removal.dml
index e84b57e..68f1eae 100644
--- a/src/test/scripts/functions/recompile/if_branch_removal.dml
+++ b/src/test/scripts/functions/recompile/if_branch_removal.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3);
-
-# test if-only 
-if( $4==1 )
-{
-   v = matrix(1,rows=nrow(X),cols=1);
-   X = append(X, v);
-}  
-
-# test if-else branches
-if( $4!=1 )
-{
-   v = matrix(1,rows=nrow(X),cols=1);
-   X = append(X, v);
-} 
-else
-{
-   v1 = matrix(1,rows=nrow(X),cols=1);
-   X = append(X, v1);
-   v2 = matrix(1,rows=nrow(X),cols=1);
-   X = append(X, v2);
-}
-
+
+X = read($1, rows=$2, cols=$3);
+
+# test if-only 
+if( $4==1 )
+{
+   v = matrix(1,rows=nrow(X),cols=1);
+   X = append(X, v);
+}  
+
+# test if-else branches
+if( $4!=1 )
+{
+   v = matrix(1,rows=nrow(X),cols=1);
+   X = append(X, v);
+} 
+else
+{
+   v1 = matrix(1,rows=nrow(X),cols=1);
+   X = append(X, v1);
+   v2 = matrix(1,rows=nrow(X),cols=1);
+   X = append(X, v2);
+}
+
 write(X, $5);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/if_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/if_recompile.dml b/src/test/scripts/functions/recompile/if_recompile.dml
index f8a4555..2d02e01 100644
--- a/src/test/scripts/functions/recompile/if_recompile.dml
+++ b/src/test/scripts/functions/recompile/if_recompile.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
-Z = Rand(rows=1,cols=1,min=0,max=0);
-if( castAsScalar(V[1,1])>castAsScalar(Z[1,1]) )
-{
-   Z[1,1] = V[1,1]; 
-}  
+
+V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
+Z = Rand(rows=1,cols=1,min=0,max=0);
+if( castAsScalar(V[1,1])>castAsScalar(Z[1,1]) )
+{
+   Z[1,1] = V[1,1]; 
+}  
 write(Z, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/if_recompile_func_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/if_recompile_func_sparse.dml b/src/test/scripts/functions/recompile/if_recompile_func_sparse.dml
index 985e5a2..424c062 100644
--- a/src/test/scripts/functions/recompile/if_recompile_func_sparse.dml
+++ b/src/test/scripts/functions/recompile/if_recompile_func_sparse.dml
@@ -19,32 +19,32 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function (Matrix[Double] X)
-    return (Matrix[Double] Y)
-{  
-   V = X;
-   print(sum(V)); 
-   if( 1==1 ){
-      V[1,1] = $2;
-   }
-   else {
-      print(sum(V));
-   }   
-   Y = V;  
-}
-
-foo2 = function (Matrix[Double] X)
-    return (Matrix[Double] Y)
-{  
-   V = X;     
-   if( 1==1 ){
-      print(sum(V));
-   } 
-   Y = V;   
-}
-
-V = read($1);
-V = foo1(V);
-V = foo2(V);
+
+foo1 = function (Matrix[Double] X)
+    return (Matrix[Double] Y)
+{  
+   V = X;
+   print(sum(V)); 
+   if( 1==1 ){
+      V[1,1] = $2;
+   }
+   else {
+      print(sum(V));
+   }   
+   Y = V;  
+}
+
+foo2 = function (Matrix[Double] X)
+    return (Matrix[Double] Y)
+{  
+   V = X;     
+   if( 1==1 ){
+      print(sum(V));
+   } 
+   Y = V;   
+}
+
+V = read($1);
+V = foo1(V);
+V = foo2(V);
 write(V, $3);         
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/if_recompile_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/if_recompile_sparse.dml b/src/test/scripts/functions/recompile/if_recompile_sparse.dml
index cf7fcc7..9b878cc 100644
--- a/src/test/scripts/functions/recompile/if_recompile_sparse.dml
+++ b/src/test/scripts/functions/recompile/if_recompile_sparse.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-print(sum(V)); 
-if( 1==1 ){
-   V[1,1] = $2;
-}
-else {
-   print(sum(V));
-}
-if( 1==1 ){
-   print(sum(V));
-}
-
+
+V = read($1);
+print(sum(V)); 
+if( 1==1 ){
+   V[1,1] = $2;
+}
+else {
+   print(sum(V));
+}
+if( 1==1 ){
+   print(sum(V));
+}
+
 write(V, $3);         
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls1.R b/src/test/scripts/functions/recompile/multiple_function_calls1.R
index 68af751..7c16f72 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls1.R
+++ b/src/test/scripts/functions/recompile/multiple_function_calls1.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V1 = V;
-V2 = V;
-
-if( nrow(V)>5 ) {
-   V1 = V1 + 5; 
-}
-if( nrow(V)>5 ) {
-   V2 = V2 + 5; 
-} 
-
-R = V1+V2;
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V1 = V;
+V2 = V;
+
+if( nrow(V)>5 ) {
+   V1 = V1 + 5; 
+}
+if( nrow(V)>5 ) {
+   V2 = V2 + 5; 
+} 
+
+R = V1+V2;
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls1.dml b/src/test/scripts/functions/recompile/multiple_function_calls1.dml
index 592f490..fa2a97d 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls1.dml
+++ b/src/test/scripts/functions/recompile/multiple_function_calls1.dml
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-# conditional propagate size, because called multiple times
-foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)>5 )
-      X = X + 5; 
-   
-   Xout = X;
-}
-
-# to be removed because never called
-foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)<5 )
-      X = X - 5; 
-   
-   Xout = X;
-}
-
-V = read($1);
-
-R1 = foo1(V);
-R2 = foo1(V);
-R = R1+R2;
-
-write(R, $2);   
+
+# conditional propagate size, because called multiple times
+foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)>5 )
+      X = X + 5; 
+   
+   Xout = X;
+}
+
+# to be removed because never called
+foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)<5 )
+      X = X - 5; 
+   
+   Xout = X;
+}
+
+V = read($1);
+
+R1 = foo1(V);
+R2 = foo1(V);
+R = R1+R2;
+
+write(R, $2);   

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls2.R b/src/test/scripts/functions/recompile/multiple_function_calls2.R
index 0512553..d82d33c 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls2.R
+++ b/src/test/scripts/functions/recompile/multiple_function_calls2.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V1 = V-0.5;
-V2 = V;
-
-if( nrow(V)>5 ) {
-   V1 = V1 + 5; 
-}
-if( nrow(V)>5 ) {
-   V2 = V2 + 5; 
-} 
-
-R = V1+V2;
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V1 = V-0.5;
+V2 = V;
+
+if( nrow(V)>5 ) {
+   V1 = V1 + 5; 
+}
+if( nrow(V)>5 ) {
+   V2 = V2 + 5; 
+} 
+
+R = V1+V2;
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls2.dml b/src/test/scripts/functions/recompile/multiple_function_calls2.dml
index 421deec..ea0e322 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls2.dml
+++ b/src/test/scripts/functions/recompile/multiple_function_calls2.dml
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-# conditional propagate size, because called multiple times
-foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)>5 )
-      X = X + 5; 
-   
-   Xout = X;
-}
-
-# to be removed because never called
-foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)<5 )
-      X = X - 5; 
-   
-   Xout = X;
-}
-
-V = read($1);
-
-R1 = foo1(V-0.5);
-R2 = foo1(V);
-R = R1+R2;
-
-write(R, $2);   
+
+# conditional propagate size, because called multiple times
+foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)>5 )
+      X = X + 5; 
+   
+   Xout = X;
+}
+
+# to be removed because never called
+foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)<5 )
+      X = X - 5; 
+   
+   Xout = X;
+}
+
+V = read($1);
+
+R1 = foo1(V-0.5);
+R2 = foo1(V);
+R = R1+R2;
+
+write(R, $2);   

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls3.R b/src/test/scripts/functions/recompile/multiple_function_calls3.R
index 14001d6..eb8ea57 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls3.R
+++ b/src/test/scripts/functions/recompile/multiple_function_calls3.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V1 = V;
-V2 = V-0.5;
-
-if( nrow(V)>5 ) {
-   V1 = V1 + 5; 
-}
-if( nrow(V)>5 ) {
-   V2 = V2 + 5; 
-} 
-
-R = V1+V2;
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V1 = V;
+V2 = V-0.5;
+
+if( nrow(V)>5 ) {
+   V1 = V1 + 5; 
+}
+if( nrow(V)>5 ) {
+   V2 = V2 + 5; 
+} 
+
+R = V1+V2;
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls3.dml b/src/test/scripts/functions/recompile/multiple_function_calls3.dml
index 1962304..73ea192 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls3.dml
+++ b/src/test/scripts/functions/recompile/multiple_function_calls3.dml
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-# conditional propagate size, because called multiple times
-foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)>5 )
-      X = X + 5; 
-   
-   Xout = X;
-}
-
-# to be removed because never called
-foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)<5 )
-      X = X - 5; 
-   
-   Xout = X;
-}
-
-V = read($1);
-
-R1 = foo1(V);
-R2 = foo1(V-0.5);
-R = R1+R2;
-
-write(R, $2);   
+
+# conditional propagate size, because called multiple times
+foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)>5 )
+      X = X + 5; 
+   
+   Xout = X;
+}
+
+# to be removed because never called
+foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)<5 )
+      X = X - 5; 
+   
+   Xout = X;
+}
+
+V = read($1);
+
+R1 = foo1(V);
+R2 = foo1(V-0.5);
+R = R1+R2;
+
+write(R, $2);   

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls4.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls4.R b/src/test/scripts/functions/recompile/multiple_function_calls4.R
index 07b33ea..f910736 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls4.R
+++ b/src/test/scripts/functions/recompile/multiple_function_calls4.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V1 = V-0.5;
-V2 = V-0.5;
-
-if( nrow(V)>5 ) {
-   V1 = V1 + 5; 
-}
-if( nrow(V)>5 ) {
-   V2 = V2 + 5; 
-} 
-
-R = V1+V2;
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V1 = V-0.5;
+V2 = V-0.5;
+
+if( nrow(V)>5 ) {
+   V1 = V1 + 5; 
+}
+if( nrow(V)>5 ) {
+   V2 = V2 + 5; 
+} 
+
+R = V1+V2;
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls4.dml b/src/test/scripts/functions/recompile/multiple_function_calls4.dml
index fec658c..da6d34a 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls4.dml
+++ b/src/test/scripts/functions/recompile/multiple_function_calls4.dml
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-# conditional propagate size, because called multiple times
-foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)>5 )
-      X = X + 5; 
-   
-   Xout = X;
-}
-
-# to be removed because never called
-foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)<5 )
-      X = X - 5; 
-   
-   Xout = X;
-}
-
-V = read($1);
-
-R1 = foo1(V-0.5);
-R2 = foo1(V-0.5);
-R = R1+R2;
-
-write(R, $2);   
+
+# conditional propagate size, because called multiple times
+foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)>5 )
+      X = X + 5; 
+   
+   Xout = X;
+}
+
+# to be removed because never called
+foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)<5 )
+      X = X - 5; 
+   
+   Xout = X;
+}
+
+V = read($1);
+
+R1 = foo1(V-0.5);
+R2 = foo1(V-0.5);
+R = R1+R2;
+
+write(R, $2);   

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls5.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls5.R b/src/test/scripts/functions/recompile/multiple_function_calls5.R
index 68af751..7c16f72 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls5.R
+++ b/src/test/scripts/functions/recompile/multiple_function_calls5.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V1 = V;
-V2 = V;
-
-if( nrow(V)>5 ) {
-   V1 = V1 + 5; 
-}
-if( nrow(V)>5 ) {
-   V2 = V2 + 5; 
-} 
-
-R = V1+V2;
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V1 = V;
+V2 = V;
+
+if( nrow(V)>5 ) {
+   V1 = V1 + 5; 
+}
+if( nrow(V)>5 ) {
+   V2 = V2 + 5; 
+} 
+
+R = V1+V2;
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_function_calls5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_function_calls5.dml b/src/test/scripts/functions/recompile/multiple_function_calls5.dml
index 8f71c5f..dc1b420 100644
--- a/src/test/scripts/functions/recompile/multiple_function_calls5.dml
+++ b/src/test/scripts/functions/recompile/multiple_function_calls5.dml
@@ -19,32 +19,32 @@
 #
 #-------------------------------------------------------------
 
-
-# conditional propagate size, because called multiple times
-foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)>5 )
-      X = X + 5; 
-   
-   Xout = X;
-}
-
-# to be removed because never called
-foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
-{
-   X = Xin;
-   if( nrow(Xin)<5 )
-      X = X - 5; 
-   
-   Xout = X;
-}
-
-V = read($1);
-
-R1 = foo1(V);
-Vp = append(V,matrix(1,rows=nrow(V),cols=1))
-R2 = foo1(Vp);
-R = R1+R2[,1:ncol(V)];
-
-write(R, $2);   
+
+# conditional propagate size, because called multiple times
+foo1 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)>5 )
+      X = X + 5; 
+   
+   Xout = X;
+}
+
+# to be removed because never called
+foo2 = function(Matrix[Double] Xin) return (Matrix[Double] Xout) 
+{
+   X = Xin;
+   if( nrow(Xin)<5 )
+      X = X - 5; 
+   
+   Xout = X;
+}
+
+V = read($1);
+
+R1 = foo1(V);
+Vp = append(V,matrix(1,rows=nrow(V),cols=1))
+R2 = foo1(Vp);
+R = R1+R2[,1:ncol(V)];
+
+write(R, $2);   

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_reads.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_reads.R b/src/test/scripts/functions/recompile/multiple_reads.R
index ecb2cad..9f86072 100644
--- a/src/test/scripts/functions/recompile/multiple_reads.R
+++ b/src/test/scripts/functions/recompile/multiple_reads.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X1.mtx", sep=""))
-
-if( 1==1 )
-{
-   X <- readMM(paste(args[1], "X2.mtx", sep=""))
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X1.mtx", sep=""))
+
+if( 1==1 )
+{
+   X <- readMM(paste(args[1], "X2.mtx", sep=""))
+}
+
 writeMM(as(X, "CsparseMatrix"), paste(args[2], "X", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/multiple_reads.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/multiple_reads.dml b/src/test/scripts/functions/recompile/multiple_reads.dml
index 31efdd3..021b11d 100644
--- a/src/test/scripts/functions/recompile/multiple_reads.dml
+++ b/src/test/scripts/functions/recompile/multiple_reads.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3);
-
-if( 1==1 )
-{
-   X = read($4, rows=$5, cols=$6);
-}
-
+
+X = read($1, rows=$2, cols=$3);
+
+if( 1==1 )
+{
+   X = read($4, rows=$5, cols=$6);
+}
+
 write(X, $7);      
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/parfor_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/parfor_recompile.dml b/src/test/scripts/functions/recompile/parfor_recompile.dml
index fe74001..5e14440 100644
--- a/src/test/scripts/functions/recompile/parfor_recompile.dml
+++ b/src/test/scripts/functions/recompile/parfor_recompile.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
-Z = Rand(rows=1,cols=1,min=0,max=0);
-parfor( i in $3:castAsScalar(V[1,1]), check=0 )
-{
-   Z[1,1] = V[1,1]; 
-}  
+
+V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
+Z = Rand(rows=1,cols=1,min=0,max=0);
+parfor( i in $3:castAsScalar(V[1,1]), check=0 )
+{
+   Z[1,1] = V[1,1]; 
+}  
 write(Z, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/parfor_recompile_func_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/parfor_recompile_func_sparse.dml b/src/test/scripts/functions/recompile/parfor_recompile_func_sparse.dml
index 5f80b44..78dcff0 100644
--- a/src/test/scripts/functions/recompile/parfor_recompile_func_sparse.dml
+++ b/src/test/scripts/functions/recompile/parfor_recompile_func_sparse.dml
@@ -19,32 +19,32 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function (Matrix[Double] X)
-    return (Matrix[Double] Y)
-{  
-   V = X;
-   print(sum(V)); 
-   parfor( i in 1:1, check=0 )
-   {
-      print(sum(V)); 
-      V = foo2(V,i)
-   }  
-   Y = V;  
-}
-
-foo2 = function (Matrix[Double] X, Integer i)
-    return (Matrix[Double] Y)
-{  
-   V = X;     
-   parfor( j in 1:1 )
-   {
-      V[i,j] = $2; 
-   }  
-   Y = V;   
-}
-
-
-V = read($1);
-V = foo1(V);
+
+foo1 = function (Matrix[Double] X)
+    return (Matrix[Double] Y)
+{  
+   V = X;
+   print(sum(V)); 
+   parfor( i in 1:1, check=0 )
+   {
+      print(sum(V)); 
+      V = foo2(V,i)
+   }  
+   Y = V;  
+}
+
+foo2 = function (Matrix[Double] X, Integer i)
+    return (Matrix[Double] Y)
+{  
+   V = X;     
+   parfor( j in 1:1 )
+   {
+      V[i,j] = $2; 
+   }  
+   Y = V;   
+}
+
+
+V = read($1);
+V = foo1(V);
 write(V, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/parfor_recompile_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/parfor_recompile_sparse.dml b/src/test/scripts/functions/recompile/parfor_recompile_sparse.dml
index e251115..ae66c57 100644
--- a/src/test/scripts/functions/recompile/parfor_recompile_sparse.dml
+++ b/src/test/scripts/functions/recompile/parfor_recompile_sparse.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-print(sum(V)); 
-parfor( i in 1:1 )
-{
-   print(sum(V[i,])); 
-   
-   parfor( j in 1:1 )
-   {
-      V[i,j] = $2; 
-   }
-}  
+
+V = read($1);
+print(sum(V)); 
+parfor( i in 1:1 )
+{
+   print(sum(V[i,])); 
+   
+   parfor( j in 1:1 )
+   {
+      V[i,j] = $2; 
+   }
+}  
 write(V, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rand_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rand_recompile.dml b/src/test/scripts/functions/recompile/rand_recompile.dml
index 3f842b7..c3ec6f1 100644
--- a/src/test/scripts/functions/recompile/rand_recompile.dml
+++ b/src/test/scripts/functions/recompile/rand_recompile.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-n = $1;
-s = 0;
-
-x1= n+5;
-x2= n+7;
-
-if( 1==1 )
-{
-  D = Rand(rows=x1, cols=x2);
-  s = s + sum(D);
-}
-
+
+n = $1;
+s = 0;
+
+x1= n+5;
+x2= n+7;
+
+if( 1==1 )
+{
+  D = Rand(rows=x1, cols=x2);
+  s = s + sum(D);
+}
+
 print(s);    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rand_recompile2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rand_recompile2.dml b/src/test/scripts/functions/recompile/rand_recompile2.dml
index 0a0bfca..281939b 100644
--- a/src/test/scripts/functions/recompile/rand_recompile2.dml
+++ b/src/test/scripts/functions/recompile/rand_recompile2.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-testFun = function (Matrix[double] X)
-    return (Double s)
-{  
-   if(0==0){} #prevent inlining
-   
-   m = nrow(X);
-   D = Rand(rows=m,cols=1);
-   s = sum(D);
-}
-
-m = $1;
-n = $2;
-
-X = Rand(rows=10,cols=10);
-s = testFun(X);
+
+testFun = function (Matrix[double] X)
+    return (Double s)
+{  
+   if(0==0){} #prevent inlining
+   
+   m = nrow(X);
+   D = Rand(rows=m,cols=1);
+   s = sum(D);
+}
+
+m = $1;
+n = $2;
+
+X = Rand(rows=10,cols=10);
+s = testFun(X);
 print(s);   
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rand_recompile3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rand_recompile3.dml b/src/test/scripts/functions/recompile/rand_recompile3.dml
index 3cf1300..1faacfa 100644
--- a/src/test/scripts/functions/recompile/rand_recompile3.dml
+++ b/src/test/scripts/functions/recompile/rand_recompile3.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-testFun = function (Matrix[double] X)
-    return (Double s)
-{  
-   if(0==0){} #prevent inlining
-   
-   n = ncol(X);
-   D = Rand(rows=1,cols=n);
-   s = sum(D);
-}
-
-m = $1;
-n = $2;
-
-X = Rand(rows=10,cols=10);
-s = testFun(X);
+
+testFun = function (Matrix[double] X)
+    return (Double s)
+{  
+   if(0==0){} #prevent inlining
+   
+   n = ncol(X);
+   D = Rand(rows=1,cols=n);
+   s = sum(D);
+}
+
+m = $1;
+n = $2;
+
+X = Rand(rows=10,cols=10);
+s = testFun(X);
 print(s);   
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rand_size_expr_eval.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rand_size_expr_eval.dml b/src/test/scripts/functions/recompile/rand_size_expr_eval.dml
index 300f9b1..eddb4a5 100644
--- a/src/test/scripts/functions/recompile/rand_size_expr_eval.dml
+++ b/src/test/scripts/functions/recompile/rand_size_expr_eval.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-m = $1;
-n = $2;
-
-A = matrix(1, rows=m, cols=n);
-B = A %*% A;
-
-C1 = matrix(1, rows=nrow(B)*ncol(B), cols=1);
-C2 = matrix(1, rows=1, cols=nrow(B)*ncol(B));        
-C3 = matrix(1, rows=(0+1*ncol(B)+1-1/1), cols=(0+1*nrow(B)+1-1/1) );
-
-R = matrix(1, rows=1, cols=1);
-R[1,1] = sum(C1)+sum(C2)+sum(C3);
-
+
+m = $1;
+n = $2;
+
+A = matrix(1, rows=m, cols=n);
+B = A %*% A;
+
+C1 = matrix(1, rows=nrow(B)*ncol(B), cols=1);
+C2 = matrix(1, rows=1, cols=nrow(B)*ncol(B));        
+C3 = matrix(1, rows=(0+1*ncol(B)+1-1/1), cols=(0+1*nrow(B)+1-1/1) );
+
+R = matrix(1, rows=1, cols=1);
+R[1,1] = sum(C1)+sum(C2)+sum(C3);
+
 write(R, $3, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile1.R b/src/test/scripts/functions/recompile/rblk_recompile1.R
index eb679df..8b29dfe 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile1.R
+++ b/src/test/scripts/functions/recompile/rblk_recompile1.R
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
 writeMM(as(V, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile1.dml b/src/test/scripts/functions/recompile/rblk_recompile1.dml
index ffafa2b..cdef589 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile1.dml
+++ b/src/test/scripts/functions/recompile/rblk_recompile1.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1, rows=$2, cols=1, format="text");
+
+V = read($1, rows=$2, cols=1, format="text");
 write(V, $3, format="binary");       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile2.R b/src/test/scripts/functions/recompile/rblk_recompile2.R
index 4ea8531..3898b44 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile2.R
+++ b/src/test/scripts/functions/recompile/rblk_recompile2.R
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- floor(V)
-W <- table(as.vector(V))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- floor(V)
+W <- table(as.vector(V))
 writeMM(as(as.matrix(W), "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile2.dml b/src/test/scripts/functions/recompile/rblk_recompile2.dml
index 44e5761..081d2d3 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile2.dml
+++ b/src/test/scripts/functions/recompile/rblk_recompile2.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1, rows=$2, cols=1, format="text");
-W = table(V,1);
+
+V = read($1, rows=$2, cols=1, format="text");
+W = table(V,1);
 write(W, $3, format="binary");     
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile3.R b/src/test/scripts/functions/recompile/rblk_recompile3.R
index 4ea8531..3898b44 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile3.R
+++ b/src/test/scripts/functions/recompile/rblk_recompile3.R
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- floor(V)
-W <- table(as.vector(V))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- floor(V)
+W <- table(as.vector(V))
 writeMM(as(as.matrix(W), "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rblk_recompile3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rblk_recompile3.dml b/src/test/scripts/functions/recompile/rblk_recompile3.dml
index a2afc25..1c21579 100644
--- a/src/test/scripts/functions/recompile/rblk_recompile3.dml
+++ b/src/test/scripts/functions/recompile/rblk_recompile3.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1, rows=$2, cols=1, format="text");
-W = V*1;
-X = aggregate(target=V, groups=W, fn="count");
+
+V = read($1, rows=$2, cols=1, format="text");
+W = V*1;
+X = aggregate(target=V, groups=W, fn="count");
 write(X, $3, format="binary");     
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri1.R b/src/test/scripts/functions/recompile/remove_empty_potpourri1.R
index 622a1a8..eb16ff8 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri1.R
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri1.R
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-M = seq (1, 100, 1)
-R = matrix (M, 20, 5, byrow=TRUE)
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+M = seq (1, 100, 1)
+R = matrix (M, 20, 5, byrow=TRUE)
+
 writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri1.dml b/src/test/scripts/functions/recompile/remove_empty_potpourri1.dml
index 7ffc903..37eb904 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri1.dml
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri1.dml
@@ -19,7 +19,7 @@
 #
 #-------------------------------------------------------------
 
-
-M = seq (1, 100, 1)
-R = matrix (M, rows=20, cols=5)
+
+M = seq (1, 100, 1)
+R = matrix (M, rows=20, cols=5)
 write(R, $1);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri2.R b/src/test/scripts/functions/recompile/remove_empty_potpourri2.R
index 945aace..39ef00c 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri2.R
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri2.R
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = matrix(0, 100, 1);
-R = colSums(A)
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = matrix(0, 100, 1);
+R = colSums(A)
 writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri2.dml b/src/test/scripts/functions/recompile/remove_empty_potpourri2.dml
index 1c72f17..70027dd 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri2.dml
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri2.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=100,cols=1);
-R = colSums(A)
-
-#force original error on cpvar
-if(1==1){} 
-
+
+A = matrix(0, rows=100,cols=1);
+R = colSums(A)
+
+#force original error on cpvar
+if(1==1){} 
+
 write(R, $1);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri3.R b/src/test/scripts/functions/recompile/remove_empty_potpourri3.R
index 5f8ea6d..b2fde40 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri3.R
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri3.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-S = matrix(1,100,1);
-A = diag(as.vector(S));
-B = A %*% S;
-C = table (B, seq (1, nrow(A), 1));
-R = colSums(C);
-R = R %*% A;
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+S = matrix(1,100,1);
+A = diag(as.vector(S));
+B = A %*% S;
+C = table (B, seq (1, nrow(A), 1));
+R = colSums(C);
+R = R %*% A;
+
 writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri3.dml b/src/test/scripts/functions/recompile/remove_empty_potpourri3.dml
index c5e8288..a29acd6 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri3.dml
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri3.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-S = matrix(1,rows=100,cols=1);
-if(1==1){}
-
-A = diag(S);
-A = removeEmpty (target = A, margin = "rows");
-B = A %*% S;
-C = table (B, seq (1, nrow(A), 1));
-R = removeEmpty (target = C, margin = "rows");
-R = R %*% A;
-
-if(1==1){} 
-
-print(sum(R));
+
+S = matrix(1,rows=100,cols=1);
+if(1==1){}
+
+A = diag(S);
+A = removeEmpty (target = A, margin = "rows");
+B = A %*% S;
+C = table (B, seq (1, nrow(A), 1));
+R = removeEmpty (target = C, margin = "rows");
+R = R %*% A;
+
+if(1==1){} 
+
+print(sum(R));
 write(R, $1);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri4.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri4.R b/src/test/scripts/functions/recompile/remove_empty_potpourri4.R
index 36bc892..e07f18f 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri4.R
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri4.R
@@ -1,41 +1,41 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = matrix(1, 1000, 3);
-B = matrix(1, 1000, 2);
-C = matrix(7, 1000, 1);
-D = matrix(3, 1000, 1);
-
-E = cbind(X [, 1 : 2], B) * ((C * (1 - D))%*%matrix(1,1,4));
-X = X * C%*%matrix(1,1,3);
-n = nrow (X);
-
-R = X + sum(E) + n;
-
-cat(sum(R))
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = matrix(1, 1000, 3);
+B = matrix(1, 1000, 2);
+C = matrix(7, 1000, 1);
+D = matrix(3, 1000, 1);
+
+E = cbind(X [, 1 : 2], B) * ((C * (1 - D))%*%matrix(1,1,4));
+X = X * C%*%matrix(1,1,3);
+n = nrow (X);
+
+R = X + sum(E) + n;
+
+cat(sum(R))
+
 writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_potpourri4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri4.dml b/src/test/scripts/functions/recompile/remove_empty_potpourri4.dml
index 54f890c..d651ed6 100644
--- a/src/test/scripts/functions/recompile/remove_empty_potpourri4.dml
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri4.dml
@@ -1,42 +1,42 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-X = matrix(1, rows=1000, cols=3);
-B = matrix(1, rows=1000, cols=2);
-C = matrix(7, rows=1000, cols=1);
-D = matrix(3, rows=1000, cols=1);
-
-if(1==1){}
-
-tmp = append(X [, 1 : 2], B) * (C * (1 - D));
-E = removeEmpty (target = tmp, margin = "rows");
-
-X = removeEmpty (target = X * C, margin = "rows");
-n = nrow (X);
-
-if(1==1){} 
-
-R = X + sum(E) + n;
-
-print(sum(R))
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(1, rows=1000, cols=3);
+B = matrix(1, rows=1000, cols=2);
+C = matrix(7, rows=1000, cols=1);
+D = matrix(3, rows=1000, cols=1);
+
+if(1==1){}
+
+tmp = append(X [, 1 : 2], B) * (C * (1 - D));
+E = removeEmpty (target = tmp, margin = "rows");
+
+X = removeEmpty (target = X * C, margin = "rows");
+n = nrow (X);
+
+if(1==1){} 
+
+R = X + sum(E) + n;
+
+print(sum(R))
+
 write(R, $1);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_recompile.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_recompile.R b/src/test/scripts/functions/recompile/remove_empty_recompile.R
index 41a62df..da11c49 100644
--- a/src/test/scripts/functions/recompile/remove_empty_recompile.R
+++ b/src/test/scripts/functions/recompile/remove_empty_recompile.R
@@ -19,57 +19,57 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-
-type = as.integer(args[2]);
-
-R = X;
-
-if( type==0 ){
-  R = as.matrix( sum(X) );
-}
-if( type==1 ){
-  R = round(X);
-}
-if( type==2 ){
-  R = t(X); 
-}
-if( type==3 ){
-  R = X*(X-1);
-}
-if( type==4 ){
-  R = (X-1)*X;
-}
-if( type==5 ){
-  R = X+(X-1);
-}
-if( type==6 ){
-  R = (X-1)+X;
-}
-if( type==7 ){
-  R = X-(X+2);
-}
-if( type==8 ){
-  R = (X+2)-X;
-}
-if( type==9 ){
-  R = X%*%(X-1);
-}
-if( type==10 ){
-  R = (X-1)%*%X;
-}
-if( type==11 ){
-  R = X[1:(nrow(X)-1), 1:(ncol(X)-1)];
-}
-if( type==12 ){
-  X[1,] = X[2,];
-  R = X;
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+
+type = as.integer(args[2]);
+
+R = X;
+
+if( type==0 ){
+  R = as.matrix( sum(X) );
+}
+if( type==1 ){
+  R = round(X);
+}
+if( type==2 ){
+  R = t(X); 
+}
+if( type==3 ){
+  R = X*(X-1);
+}
+if( type==4 ){
+  R = (X-1)*X;
+}
+if( type==5 ){
+  R = X+(X-1);
+}
+if( type==6 ){
+  R = (X-1)+X;
+}
+if( type==7 ){
+  R = X-(X+2);
+}
+if( type==8 ){
+  R = (X+2)-X;
+}
+if( type==9 ){
+  R = X%*%(X-1);
+}
+if( type==10 ){
+  R = (X-1)%*%X;
+}
+if( type==11 ){
+  R = X[1:(nrow(X)-1), 1:(ncol(X)-1)];
+}
+if( type==12 ){
+  X[1,] = X[2,];
+  R = X;
+}
+
 writeMM(as(R, "CsparseMatrix"), paste(args[3], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/remove_empty_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_recompile.dml b/src/test/scripts/functions/recompile/remove_empty_recompile.dml
index 35682bc..a4ee8be 100644
--- a/src/test/scripts/functions/recompile/remove_empty_recompile.dml
+++ b/src/test/scripts/functions/recompile/remove_empty_recompile.dml
@@ -19,54 +19,54 @@
 #
 #-------------------------------------------------------------
 
-
-execFun = function(Matrix[Double] X, Integer type) 
-  return (Matrix[Double] R) 
-{
-   R = X;
-
-   if( type==0 ){
-      R = as.matrix( sum(X) );
-   }
-   if( type==1 ){
-      R = round(X);
-   }
-   if( type==2 ){
-      R = t(X); 
-   }
-   if( type==3 ){
-      R = X*(X-1);
-   }
-   if( type==4 ){
-      R = (X-1)*X;
-   }
-   if( type==5 ){
-      R = X+(X-1);
-   }
-   if( type==6 ){
-      R = (X-1)+X;
-   }
-   if( type==7 ){
-      R = X-(X+2);
-   }
-   if( type==8 ){    
-      R = (X+2)-X;
-   }
-   if( type==9 ){
-      R = X%*%(X-1);
-   }
-   if( type==10 ){
-      R = (X-1)%*%X;
-   }
-   if( type==11 ){
-      R = X[1:(nrow(X)-1), 1:(ncol(X)-1)];
-   }
-   if( type==12 ){
-      X[1,] = X[2,];
-      R = X;
-   }
-}
-
-X = read($1);
-R = execFun( X, $2 )  
+
+execFun = function(Matrix[Double] X, Integer type) 
+  return (Matrix[Double] R) 
+{
+   R = X;
+
+   if( type==0 ){
+      R = as.matrix( sum(X) );
+   }
+   if( type==1 ){
+      R = round(X);
+   }
+   if( type==2 ){
+      R = t(X); 
+   }
+   if( type==3 ){
+      R = X*(X-1);
+   }
+   if( type==4 ){
+      R = (X-1)*X;
+   }
+   if( type==5 ){
+      R = X+(X-1);
+   }
+   if( type==6 ){
+      R = (X-1)+X;
+   }
+   if( type==7 ){
+      R = X-(X+2);
+   }
+   if( type==8 ){    
+      R = (X+2)-X;
+   }
+   if( type==9 ){
+      R = X%*%(X-1);
+   }
+   if( type==10 ){
+      R = (X-1)%*%X;
+   }
+   if( type==11 ){
+      R = X[1:(nrow(X)-1), 1:(ncol(X)-1)];
+   }
+   if( type==12 ){
+      X[1,] = X[2,];
+      R = X;
+   }
+}
+
+X = read($1);
+R = execFun( X, $2 )  
 write(R, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rewrite_mapmultchain1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rewrite_mapmultchain1.R b/src/test/scripts/functions/recompile/rewrite_mapmultchain1.R
index 23d66e4..72e9b1d 100644
--- a/src/test/scripts/functions/recompile/rewrite_mapmultchain1.R
+++ b/src/test/scripts/functions/recompile/rewrite_mapmultchain1.R
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-P <- readMM(paste(args[1], "P.mtx", sep=""))
-v <- readMM(paste(args[1], "v.mtx", sep=""))
-k = ncol(P);
-
-Q = P * (X %*% v);
-HV = t(X) %*% (Q - P * (rowSums (Q) %*% matrix(1, 1, k)));
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+P <- readMM(paste(args[1], "P.mtx", sep=""))
+v <- readMM(paste(args[1], "v.mtx", sep=""))
+k = ncol(P);
+
+Q = P * (X %*% v);
+HV = t(X) %*% (Q - P * (rowSums (Q) %*% matrix(1, 1, k)));
+
 writeMM(as(HV, "CsparseMatrix"), paste(args[2], "HV", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rewrite_mapmultchain1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rewrite_mapmultchain1.dml b/src/test/scripts/functions/recompile/rewrite_mapmultchain1.dml
index ac3c7ed..ab4eef2 100644
--- a/src/test/scripts/functions/recompile/rewrite_mapmultchain1.dml
+++ b/src/test/scripts/functions/recompile/rewrite_mapmultchain1.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-P = read($2);
-v = read($3);
-k = ncol(P);
-
-if(1==1){}
-
-Q = P * (X %*% v);
-HV = t(X) %*% (Q - P * (rowSums (Q) %*% matrix(1, rows=1, cols=k)));
-
-if(1==1){}
-
-write(HV, $4);       
+
+X = read($1);
+P = read($2);
+v = read($3);
+k = ncol(P);
+
+if(1==1){}
+
+Q = P * (X %*% v);
+HV = t(X) %*% (Q - P * (rowSums (Q) %*% matrix(1, rows=1, cols=k)));
+
+if(1==1){}
+
+write(HV, $4);       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rewrite_mapmultchain2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rewrite_mapmultchain2.R b/src/test/scripts/functions/recompile/rewrite_mapmultchain2.R
index 21baaab..1d2ff59 100644
--- a/src/test/scripts/functions/recompile/rewrite_mapmultchain2.R
+++ b/src/test/scripts/functions/recompile/rewrite_mapmultchain2.R
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-P <- readMM(paste(args[1], "P.mtx", sep=""))
-v <- readMM(paste(args[1], "v.mtx", sep=""))
-k = ncol(P);
-
-Q = P[, 1:k] * (X %*% v);
-HV = t(X) %*% (Q - P[, 1:k] * (rowSums (Q) %*% matrix(1, 1, k)));
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+P <- readMM(paste(args[1], "P.mtx", sep=""))
+v <- readMM(paste(args[1], "v.mtx", sep=""))
+k = ncol(P);
+
+Q = P[, 1:k] * (X %*% v);
+HV = t(X) %*% (Q - P[, 1:k] * (rowSums (Q) %*% matrix(1, 1, k)));
+
 writeMM(as(HV, "CsparseMatrix"), paste(args[2], "HV", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/rewrite_mapmultchain2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/rewrite_mapmultchain2.dml b/src/test/scripts/functions/recompile/rewrite_mapmultchain2.dml
index 0e7a83e..fa4db5f 100644
--- a/src/test/scripts/functions/recompile/rewrite_mapmultchain2.dml
+++ b/src/test/scripts/functions/recompile/rewrite_mapmultchain2.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-P = read($2);
-v = read($3);
-k = ncol(P);
-
-if(1==1){}
-
-Q = P[, 1:k] * (X %*% v);
-HV = t(X) %*% (Q - P[, 1:k] * (rowSums (Q) %*% matrix(1, rows=1, cols=k)));
-        
-if(1==1){}
-
-write(HV, $4);       
+
+X = read($1);
+P = read($2);
+v = read($3);
+k = ncol(P);
+
+if(1==1){}
+
+Q = P[, 1:k] * (X %*% v);
+HV = t(X) %*% (Q - P[, 1:k] * (rowSums (Q) %*% matrix(1, rows=1, cols=k)));
+        
+if(1==1){}
+
+write(HV, $4);       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/while_recompile.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/while_recompile.dml b/src/test/scripts/functions/recompile/while_recompile.dml
index ba30c92..05dd424 100644
--- a/src/test/scripts/functions/recompile/while_recompile.dml
+++ b/src/test/scripts/functions/recompile/while_recompile.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
-Z = Rand(rows=1,cols=1,min=0,max=0);
-while( castAsScalar(V[1,1])>castAsScalar(Z[1,1]) )
-{
-   Z[1,1] = V[1,1]; 
-}  
+
+V = Rand(rows=$1+1, cols=$2+1, min=$3, max=$3);
+Z = Rand(rows=1,cols=1,min=0,max=0);
+while( castAsScalar(V[1,1])>castAsScalar(Z[1,1]) )
+{
+   Z[1,1] = V[1,1]; 
+}  
 write(Z, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/while_recompile_func_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/while_recompile_func_sparse.dml b/src/test/scripts/functions/recompile/while_recompile_func_sparse.dml
index 1870afb..882fd10 100644
--- a/src/test/scripts/functions/recompile/while_recompile_func_sparse.dml
+++ b/src/test/scripts/functions/recompile/while_recompile_func_sparse.dml
@@ -19,35 +19,35 @@
 #
 #-------------------------------------------------------------
 
-
-foo1 = function (Matrix[Double] X)
-    return (Matrix[Double] Y)
-{  
-   V = X;
-   print(sum(V)); 
-   i = 1;
-   while( i <= 1 )
-   {
-      print(sum(V)); 
-      V = foo2(V,i);
-      i = i+1;
-   }
-   Y = V;  
-}
-
-foo2 = function (Matrix[Double] X, Integer i)
-    return (Matrix[Double] Y)
-{  
-   V = X;
-   j = 1; 
-   while( j <= 1 )
-   {
-      V[i,j] = $2;
-      j = j+1; 
-   }    
-   Y = V;   
-}
-
-V = read($1);
-V = foo1( V );
+
+foo1 = function (Matrix[Double] X)
+    return (Matrix[Double] Y)
+{  
+   V = X;
+   print(sum(V)); 
+   i = 1;
+   while( i <= 1 )
+   {
+      print(sum(V)); 
+      V = foo2(V,i);
+      i = i+1;
+   }
+   Y = V;  
+}
+
+foo2 = function (Matrix[Double] X, Integer i)
+    return (Matrix[Double] Y)
+{  
+   V = X;
+   j = 1; 
+   while( j <= 1 )
+   {
+      V[i,j] = $2;
+      j = j+1; 
+   }    
+   Y = V;   
+}
+
+V = read($1);
+V = foo1( V );
 write(V, $3);        
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/recompile/while_recompile_sparse.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/while_recompile_sparse.dml b/src/test/scripts/functions/recompile/while_recompile_sparse.dml
index 73fbf0f..c0a9858 100644
--- a/src/test/scripts/functions/recompile/while_recompile_sparse.dml
+++ b/src/test/scripts/functions/recompile/while_recompile_sparse.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-print(sum(V)); 
-i = 1;
-while( i <= 1 )
-{
-   print(sum(V)); 
-   
-   j = 1; 
-   while( j <= 1 )
-   {
-      V[i,j] = $2;
-      j = j+1; 
-   }
-   i = i+1;
-}  
+
+V = read($1);
+print(sum(V)); 
+i = 1;
+while( i <= 1 )
+{
+   print(sum(V)); 
+   
+   j = 1; 
+   while( j <= 1 )
+   {
+      V[i,j] = $2;
+      j = j+1; 
+   }
+   i = i+1;
+}  
 write(V, $3);        
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/DiagV2MTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/DiagV2MTest.R b/src/test/scripts/functions/reorg/DiagV2MTest.R
index d6e0159..7627d42 100644
--- a/src/test/scripts/functions/reorg/DiagV2MTest.R
+++ b/src/test/scripts/functions/reorg/DiagV2MTest.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.vector(A1);
-B=diag(A)
-C=matrix(1, nrow(B), ncol(B));
-D=B%*%C
-C=B+D
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.vector(A1);
+B=diag(A)
+C=matrix(1, nrow(B), ncol(B));
+D=B%*%C
+C=B+D
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/DiagV2MTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/DiagV2MTest.dml b/src/test/scripts/functions/reorg/DiagV2MTest.dml
index 90341a7..2fdf5c9 100644
--- a/src/test/scripts/functions/reorg/DiagV2MTest.dml
+++ b/src/test/scripts/functions/reorg/DiagV2MTest.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-A=read($1, rows=$2, cols=1, format="text")
-B=diag(A)
-C=matrix(1, rows=nrow(B), cols=ncol(B));
-D=B%*%C
-C=B+D
+
+A=read($1, rows=$2, cols=1, format="text")
+B=diag(A)
+C=matrix(1, rows=nrow(B), cols=ncol(B));
+D=B%*%C
+C=B+D
 write(C, $3, format="text")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/MatrixReshape1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/MatrixReshape1.R b/src/test/scripts/functions/reorg/MatrixReshape1.R
index 5344dff..9532686 100644
--- a/src/test/scripts/functions/reorg/MatrixReshape1.R
+++ b/src/test/scripts/functions/reorg/MatrixReshape1.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X=readMM(paste(args[1], "X.mtx", sep=""))
-Y=matrix(t(X),nrow=as.numeric(args[2]),ncol=as.numeric(args[3]),byrow=TRUE)
-writeMM(as(Y,"CsparseMatrix"), paste(args[4], "Y", sep=""), format="text")
-
-
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X=readMM(paste(args[1], "X.mtx", sep=""))
+Y=matrix(t(X),nrow=as.numeric(args[2]),ncol=as.numeric(args[3]),byrow=TRUE)
+writeMM(as(Y,"CsparseMatrix"), paste(args[4], "Y", sep=""), format="text")
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/MatrixReshape1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/MatrixReshape1.dml b/src/test/scripts/functions/reorg/MatrixReshape1.dml
index e8ad560..6931287 100644
--- a/src/test/scripts/functions/reorg/MatrixReshape1.dml
+++ b/src/test/scripts/functions/reorg/MatrixReshape1.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3, format="text");
-
-Y = matrix(X, rows=$4, cols=$5, byrow=TRUE);
+
+X = read($1, rows=$2, cols=$3, format="text");
+
+Y = matrix(X, rows=$4, cols=$5, byrow=TRUE);
 write(Y, $6, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/MatrixReshape2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/MatrixReshape2.R b/src/test/scripts/functions/reorg/MatrixReshape2.R
index 6963a06..5d59269 100644
--- a/src/test/scripts/functions/reorg/MatrixReshape2.R
+++ b/src/test/scripts/functions/reorg/MatrixReshape2.R
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X=readMM(paste(args[1], "X.mtx", sep=""))
-Y=matrix(X,nrow=as.numeric(args[2]),ncol=as.numeric(args[3]),byrow=FALSE)
-writeMM(as(Y,"CsparseMatrix"), paste(args[4], "Y", sep=""), format="text")
-
-
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X=readMM(paste(args[1], "X.mtx", sep=""))
+Y=matrix(X,nrow=as.numeric(args[2]),ncol=as.numeric(args[3]),byrow=FALSE)
+writeMM(as(Y,"CsparseMatrix"), paste(args[4], "Y", sep=""), format="text")
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/MatrixReshape2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/MatrixReshape2.dml b/src/test/scripts/functions/reorg/MatrixReshape2.dml
index 9160028..3f6fc7f 100644
--- a/src/test/scripts/functions/reorg/MatrixReshape2.dml
+++ b/src/test/scripts/functions/reorg/MatrixReshape2.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1, rows=$2, cols=$3, format="text");
-
-Y = matrix(X, rows=$4, cols=$5, byrow=FALSE);
+
+X = read($1, rows=$2, cols=$3, format="text");
+
+Y = matrix(X, rows=$4, cols=$5, byrow=FALSE);
 write(Y, $6, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Order.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Order.R b/src/test/scripts/functions/reorg/Order.R
index e3c1f9d..db5d7f5 100644
--- a/src/test/scripts/functions/reorg/Order.R
+++ b/src/test/scripts/functions/reorg/Order.R
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""))
-by = as.integer(args[2]);
-desc = as.logical(args[3]);
-ixret = as.logical(args[4]);
-
-col = A[,by];
-
-if( ixret ) {
-  B = order(col, decreasing=desc);
-} else {
-  B = A[order(col, decreasing=desc),];
-}
-
-writeMM(as(B,"CsparseMatrix"), paste(args[5], "B", sep=""))
-
-
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""))
+by = as.integer(args[2]);
+desc = as.logical(args[3]);
+ixret = as.logical(args[4]);
+
+col = A[,by];
+
+if( ixret ) {
+  B = order(col, decreasing=desc);
+} else {
+  B = A[order(col, decreasing=desc),];
+}
+
+writeMM(as(B,"CsparseMatrix"), paste(args[5], "B", sep=""))
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Order.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Order.dml b/src/test/scripts/functions/reorg/Order.dml
index aa45aad..3ac2fce 100644
--- a/src/test/scripts/functions/reorg/Order.dml
+++ b/src/test/scripts/functions/reorg/Order.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1);
-
-if(1==1){} #for recompilation rewrites
-
-B = order(target=A, by=$2, decreasing=$3, index.return=$4);
-
+
+A = read($1);
+
+if(1==1){} #for recompilation rewrites
+
+B = order(target=A, by=$2, decreasing=$3, index.return=$4);
+
 write(B, $5, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/OrderDyn.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/OrderDyn.R b/src/test/scripts/functions/reorg/OrderDyn.R
index 74bc47b..5eb3518 100644
--- a/src/test/scripts/functions/reorg/OrderDyn.R
+++ b/src/test/scripts/functions/reorg/OrderDyn.R
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""))
-by = as.integer(args[2]);
-desc = (sum(A)>100)
-ixret = (sum(A)>200)
-col = A[,by];
-
-if( ixret ) {
-  B = order(col, decreasing=desc);
-} else {
-  B = A[order(col, decreasing=desc),];
-}
-
-writeMM(as(B,"CsparseMatrix"), paste(args[5], "B", sep=""))
-
-
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""))
+by = as.integer(args[2]);
+desc = (sum(A)>100)
+ixret = (sum(A)>200)
+col = A[,by];
+
+if( ixret ) {
+  B = order(col, decreasing=desc);
+} else {
+  B = A[order(col, decreasing=desc),];
+}
+
+writeMM(as(B,"CsparseMatrix"), paste(args[5], "B", sep=""))
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/OrderDyn.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/OrderDyn.dml b/src/test/scripts/functions/reorg/OrderDyn.dml
index f8c6558..35f4d0a 100644
--- a/src/test/scripts/functions/reorg/OrderDyn.dml
+++ b/src/test/scripts/functions/reorg/OrderDyn.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1);
-
-if(1==1){} #for recompilation rewrites
-
-desc = (sum(A)>100)
-ixret = (sum(A)>200)
-B = order(target=A, by=$2, decreasing=desc, index.return=ixret);
-
+
+A = read($1);
+
+if(1==1){} #for recompilation rewrites
+
+desc = (sum(A)>100)
+ixret = (sum(A)>200)
+B = order(target=A, by=$2, decreasing=desc, index.return=ixret);
+
 write(B, $5, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Reverse1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Reverse1.R b/src/test/scripts/functions/reorg/Reverse1.R
index 7537fe9..1599d60 100644
--- a/src/test/scripts/functions/reorg/Reverse1.R
+++ b/src/test/scripts/functions/reorg/Reverse1.R
@@ -1,41 +1,41 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-B = matrix(0, nrow(A), ncol(A));
-for( i in 1:ncol(A) ) 
-{
-   col = as.vector(A[,i])
-   col = rev(col);
-   B[,i] = col;
-}
-
-writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""))
-
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+B = matrix(0, nrow(A), ncol(A));
+for( i in 1:ncol(A) ) 
+{
+   col = as.vector(A[,i])
+   col = rev(col);
+   B[,i] = col;
+}
+
+writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""))
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/reorg/Reverse1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/Reverse1.dml b/src/test/scripts/functions/reorg/Reverse1.dml
index 586d05a..7a7abe6 100644
--- a/src/test/scripts/functions/reorg/Reverse1.dml
+++ b/src/test/scripts/functions/reorg/Reverse1.dml
@@ -1,25 +1,25 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = read($1);
-B = rev(A);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+B = rev(A);
 write(B, $2);  
\ No newline at end of file


[37/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
index d54ce97..cb9fd06 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
@@ -1,372 +1,372 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
-import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.parfor.Task.TaskType;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.StatisticMonitor;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
-import org.apache.sysml.runtime.instructions.cp.IntObject;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.util.LocalFileUtils;
-import org.apache.sysml.utils.Statistics;
-
-/**
- *
- */
-public class RemoteDPParWorkerReducer extends ParWorker
-	implements Reducer<LongWritable, Writable, Writable, Writable>
-{
-
-	//MR data partitioning attributes
-	private String _inputVar = null;
-	private String _iterVar = null;
-	private PDataPartitionFormat _dpf = null;
-	private OutputInfo _info = null;
-	private int _rlen = -1;
-	private int _clen = -1;
-	private int _brlen = -1;
-	private int _bclen = -1;
-	
-	//reuse matrix partition
-	private MatrixBlock _partition = null; 
-	private boolean _tSparseCol = false;
-		
-	//MR ParWorker attributes  
-	protected String  _stringID       = null; 
-	protected HashMap<String, String> _rvarFnames = null; 
-
-	//cached collector/reporter
-	protected OutputCollector<Writable, Writable> _out = null;
-	protected Reporter _report = null;
-	
-	/**
-	 * 
-	 */
-	public RemoteDPParWorkerReducer() 
-	{
-		
-	}
-	
-	@Override
-	public void reduce(LongWritable key, Iterator<Writable> valueList, OutputCollector<Writable, Writable> out, Reporter reporter)
-		throws IOException 
-	{
-		//cache collector/reporter (for write in close)
-		_out = out;
-		_report = reporter;
-		
-		//collect input partition
-		if( _info == OutputInfo.BinaryBlockOutputInfo )
-			_partition = collectBinaryBlock( valueList );
-		else
-			_partition = collectBinaryCellInput( valueList );
-			
-		//update in-memory matrix partition
-		MatrixObject mo = (MatrixObject)_ec.getVariable( _inputVar );
-		mo.setInMemoryPartition( _partition );
-		
-		//execute program
-		LOG.trace("execute RemoteDPParWorkerReducer "+_stringID+" ("+_workerID+")");
-		try {
-			//create tasks for input data
-			Task lTask = new Task(TaskType.SET);
-			lTask.addIteration( new IntObject(_iterVar,key.get()) );
-			
-			//execute program
-			executeTask( lTask );
-		}
-		catch(Exception ex)
-		{
-			throw new IOException("ParFOR: Failed to execute task.",ex);
-		}
-		
-		//statistic maintenance (after final export)
-		RemoteParForUtils.incrementParForMRCounters(_report, 1, 1);
-	}
-
-	/**
-	 * 
-	 */
-	@Override
-	public void configure(JobConf job)
-	{
-		//Step 1: configure data partitioning information
-		_rlen = (int)MRJobConfiguration.getPartitioningNumRows( job );
-		_clen = (int)MRJobConfiguration.getPartitioningNumCols( job );
-		_brlen = MRJobConfiguration.getPartitioningBlockNumRows( job );
-		_bclen = MRJobConfiguration.getPartitioningBlockNumCols( job );
-		_iterVar = MRJobConfiguration.getPartitioningItervar( job );
-		_inputVar = MRJobConfiguration.getPartitioningMatrixvar( job );
-		_dpf = MRJobConfiguration.getPartitioningFormat( job );		
-		switch( _dpf ) { //create matrix partition for reuse
-			case ROW_WISE:    _rlen = 1; break;
-			case COLUMN_WISE: _clen = 1; break;
-			default:  throw new RuntimeException("Partition format not yet supported in fused partition-execute: "+_dpf);
-		}
-		_info = MRJobConfiguration.getPartitioningOutputInfo( job );
-		_tSparseCol = MRJobConfiguration.getPartitioningTransposedCol( job ); 
-		if( _tSparseCol )
-			_partition = new MatrixBlock((int)_clen, _rlen, true);
-		else
-			_partition = new MatrixBlock((int)_rlen, _clen, false);
-
-		//Step 1: configure parworker
-		String taskID = job.get("mapred.tip.id");		
-		LOG.trace("configure RemoteDPParWorkerReducer "+taskID);
-			
-		try
-		{
-			_stringID = taskID;
-			_workerID = IDHandler.extractIntID(_stringID); //int task ID
-
-			//use the given job configuration as source for all new job confs 
-			//NOTE: this is required because on HDP 2.3, the classpath of mr tasks contained hadoop-common.jar 
-			//which includes a core-default.xml configuration which hides the actual default cluster configuration
-			//in the context of mr jobs (for example this config points to local fs instead of hdfs by default). 
-			if( !InfrastructureAnalyzer.isLocalMode(job) ) {
-				ConfigurationManager.setCachedJobConf(job);
-			}
-			
-			//create local runtime program
-			String in = MRJobConfiguration.getProgramBlocks(job);
-			ParForBody body = ProgramConverter.parseParForBody(in, (int)_workerID);
-			_childBlocks = body.getChildBlocks();
-			_ec          = body.getEc();				
-			_resultVars  = body.getResultVarNames();
-	
-			//init local cache manager 
-			if( !CacheableData.isCachingActive() ) {
-				String uuid = IDHandler.createDistributedUniqueID();
-				LocalFileUtils.createWorkingDirectoryWithUUID( uuid );
-				CacheableData.initCaching( uuid ); //incl activation, cache dir creation (each map task gets its own dir for simplified cleanup)
-			}
-			if( !CacheableData.cacheEvictionLocalFilePrefix.contains("_") ){ //account for local mode
-				CacheableData.cacheEvictionLocalFilePrefix = CacheableData.cacheEvictionLocalFilePrefix +"_" + _workerID; 
-			}
-			
-			//ensure that resultvar files are not removed
-			super.pinResultVariables();
-		
-			//enable/disable caching (if required)
-			boolean cpCaching = MRJobConfiguration.getParforCachingConfig( job );
-			if( !cpCaching )
-				CacheableData.disableCaching();
-
-			_numTasks    = 0;
-			_numIters    = 0;			
-		}
-		catch(Exception ex)
-		{
-			throw new RuntimeException(ex);
-		}
-		
-		//disable parfor stat monitoring, reporting execution times via counters not useful 
-		StatisticMonitor.disableStatMonitoring();
-		
-		//always reset stats because counters per map task (for case of JVM reuse)
-		if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job) )
-		{
-			CacheStatistics.reset();
-			Statistics.reset();
-		}
-	}
-	
-	/**
-	 * 
-	 */
-	@Override
-	public void close() 
-	    throws IOException 
-	{
-		try
-		{
-			//write output if required (matrix indexed write)
-			RemoteParForUtils.exportResultVariables( _workerID, _ec.getVariables(), _resultVars, _out );
-		
-			//statistic maintenance (after final export)
-			RemoteParForUtils.incrementParForMRCounters(_report, 0, 0);
-			
-			//print heaver hitter per task
-			JobConf job = ConfigurationManager.getCachedJobConf();
-			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job) )
-				LOG.info("\nSystemML Statistics:\nHeavy hitter instructions (name, time, count):\n" + Statistics.getHeavyHitters(10));		
-		}
-		catch(Exception ex)
-		{
-			throw new IOException( ex );
-		}
-		
-		//cleanup cache and local tmp dir
-		RemoteParForUtils.cleanupWorkingDirectories();
-		
-		//ensure caching is not disabled for CP in local mode
-		CacheableData.enableCaching();
-	}
-	
-	/**
-	 * Collects a matrixblock partition from a given input iterator over 
-	 * binary blocks.
-	 * 
-	 * Note it reuses the instance attribute _partition - multiple calls
-	 * will overwrite the result.
-	 * 
-	 * @param valueList
-	 * @return
-	 * @throws IOException 
-	 */
-	private MatrixBlock collectBinaryBlock( Iterator<Writable> valueList ) 
-		throws IOException 
-	{
-		try
-		{
-			//reset reuse block, keep configured representation
-			_partition.reset(_rlen, _clen);	
-
-			while( valueList.hasNext() )
-			{
-				PairWritableBlock pairValue = (PairWritableBlock)valueList.next();
-				int row_offset = (int)(pairValue.indexes.getRowIndex()-1)*_brlen;
-				int col_offset = (int)(pairValue.indexes.getColumnIndex()-1)*_bclen;
-				MatrixBlock block = pairValue.block;
-				if( !_partition.isInSparseFormat() ) //DENSE
-				{
-					_partition.copy( row_offset, row_offset+block.getNumRows()-1, 
-							   col_offset, col_offset+block.getNumColumns()-1,
-							   pairValue.block, false ); 
-				}
-				else //SPARSE 
-				{
-					_partition.appendToSparse(pairValue.block, row_offset, col_offset);
-				}
-			}
-
-			//final partition cleanup
-			cleanupCollectedMatrixPartition( _partition.isInSparseFormat() );
-		}
-		catch(DMLRuntimeException ex)
-		{
-			throw new IOException(ex);
-		}
-		
-		return _partition;
-	}
-	
-	
-	/**
-	 * Collects a matrixblock partition from a given input iterator over 
-	 * binary cells.
-	 * 
-	 * Note it reuses the instance attribute _partition - multiple calls
-	 * will overwrite the result.
-	 * 
-	 * @param valueList
-	 * @return
-	 * @throws IOException 
-	 */
-	private MatrixBlock collectBinaryCellInput( Iterator<Writable> valueList ) 
-		throws IOException 
-	{
-		//reset reuse block, keep configured representation
-		if( _tSparseCol )
-			_partition.reset(_clen, _rlen);	
-		else
-			_partition.reset(_rlen, _clen);
-		
-		switch( _dpf )
-		{
-			case ROW_WISE:
-				while( valueList.hasNext() )
-				{
-					PairWritableCell pairValue = (PairWritableCell)valueList.next();
-					if( pairValue.indexes.getColumnIndex()<0 )
-						continue; //cells used to ensure empty partitions
-					_partition.quickSetValue(0, (int)pairValue.indexes.getColumnIndex()-1, pairValue.cell.getValue());
-				}
-				break;
-			case COLUMN_WISE:
-				while( valueList.hasNext() )
-				{
-					PairWritableCell pairValue = (PairWritableCell)valueList.next();
-					if( pairValue.indexes.getRowIndex()<0 )
-						continue; //cells used to ensure empty partitions
-					if( _tSparseCol )
-						_partition.appendValue(0,(int)pairValue.indexes.getRowIndex()-1, pairValue.cell.getValue());
-					else
-						_partition.quickSetValue((int)pairValue.indexes.getRowIndex()-1, 0, pairValue.cell.getValue());
-				}
-				break;
-			default: 
-				throw new IOException("Partition format not yet supported in fused partition-execute: "+_dpf);
-		}
-		
-		//final partition cleanup
-		cleanupCollectedMatrixPartition(_tSparseCol);
-		
-		return _partition;
-	}
-	
-	/**
-	 * 
-	 * @param sort
-	 * @throws IOException
-	 */
-	private void cleanupCollectedMatrixPartition(boolean sort) 
-		throws IOException
-	{
-		//sort sparse row contents if required
-		if( _partition.isInSparseFormat() && sort )
-			_partition.sortSparseRows();
-
-		//ensure right number of nnz
-		if( !_partition.isInSparseFormat() )
-			_partition.recomputeNonZeros();
-			
-		//exam and switch dense/sparse representation
-		try {
-			_partition.examSparsity();
-		}
-		catch(Exception ex){
-			throw new IOException(ex);
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.controlprogram.parfor;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
+import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.parfor.Task.TaskType;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.StatisticMonitor;
+import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
+import org.apache.sysml.runtime.instructions.cp.IntObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.util.LocalFileUtils;
+import org.apache.sysml.utils.Statistics;
+
+/**
+ *
+ */
+public class RemoteDPParWorkerReducer extends ParWorker
+	implements Reducer<LongWritable, Writable, Writable, Writable>
+{
+
+	//MR data partitioning attributes
+	private String _inputVar = null;
+	private String _iterVar = null;
+	private PDataPartitionFormat _dpf = null;
+	private OutputInfo _info = null;
+	private int _rlen = -1;
+	private int _clen = -1;
+	private int _brlen = -1;
+	private int _bclen = -1;
+	
+	//reuse matrix partition
+	private MatrixBlock _partition = null; 
+	private boolean _tSparseCol = false;
+		
+	//MR ParWorker attributes  
+	protected String  _stringID       = null; 
+	protected HashMap<String, String> _rvarFnames = null; 
+
+	//cached collector/reporter
+	protected OutputCollector<Writable, Writable> _out = null;
+	protected Reporter _report = null;
+	
+	/**
+	 * 
+	 */
+	public RemoteDPParWorkerReducer() 
+	{
+		
+	}
+	
+	@Override
+	public void reduce(LongWritable key, Iterator<Writable> valueList, OutputCollector<Writable, Writable> out, Reporter reporter)
+		throws IOException 
+	{
+		//cache collector/reporter (for write in close)
+		_out = out;
+		_report = reporter;
+		
+		//collect input partition
+		if( _info == OutputInfo.BinaryBlockOutputInfo )
+			_partition = collectBinaryBlock( valueList );
+		else
+			_partition = collectBinaryCellInput( valueList );
+			
+		//update in-memory matrix partition
+		MatrixObject mo = (MatrixObject)_ec.getVariable( _inputVar );
+		mo.setInMemoryPartition( _partition );
+		
+		//execute program
+		LOG.trace("execute RemoteDPParWorkerReducer "+_stringID+" ("+_workerID+")");
+		try {
+			//create tasks for input data
+			Task lTask = new Task(TaskType.SET);
+			lTask.addIteration( new IntObject(_iterVar,key.get()) );
+			
+			//execute program
+			executeTask( lTask );
+		}
+		catch(Exception ex)
+		{
+			throw new IOException("ParFOR: Failed to execute task.",ex);
+		}
+		
+		//statistic maintenance (after final export)
+		RemoteParForUtils.incrementParForMRCounters(_report, 1, 1);
+	}
+
+	/**
+	 * 
+	 */
+	@Override
+	public void configure(JobConf job)
+	{
+		//Step 1: configure data partitioning information
+		_rlen = (int)MRJobConfiguration.getPartitioningNumRows( job );
+		_clen = (int)MRJobConfiguration.getPartitioningNumCols( job );
+		_brlen = MRJobConfiguration.getPartitioningBlockNumRows( job );
+		_bclen = MRJobConfiguration.getPartitioningBlockNumCols( job );
+		_iterVar = MRJobConfiguration.getPartitioningItervar( job );
+		_inputVar = MRJobConfiguration.getPartitioningMatrixvar( job );
+		_dpf = MRJobConfiguration.getPartitioningFormat( job );		
+		switch( _dpf ) { //create matrix partition for reuse
+			case ROW_WISE:    _rlen = 1; break;
+			case COLUMN_WISE: _clen = 1; break;
+			default:  throw new RuntimeException("Partition format not yet supported in fused partition-execute: "+_dpf);
+		}
+		_info = MRJobConfiguration.getPartitioningOutputInfo( job );
+		_tSparseCol = MRJobConfiguration.getPartitioningTransposedCol( job ); 
+		if( _tSparseCol )
+			_partition = new MatrixBlock((int)_clen, _rlen, true);
+		else
+			_partition = new MatrixBlock((int)_rlen, _clen, false);
+
+		//Step 1: configure parworker
+		String taskID = job.get("mapred.tip.id");		
+		LOG.trace("configure RemoteDPParWorkerReducer "+taskID);
+			
+		try
+		{
+			_stringID = taskID;
+			_workerID = IDHandler.extractIntID(_stringID); //int task ID
+
+			//use the given job configuration as source for all new job confs 
+			//NOTE: this is required because on HDP 2.3, the classpath of mr tasks contained hadoop-common.jar 
+			//which includes a core-default.xml configuration which hides the actual default cluster configuration
+			//in the context of mr jobs (for example this config points to local fs instead of hdfs by default). 
+			if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+				ConfigurationManager.setCachedJobConf(job);
+			}
+			
+			//create local runtime program
+			String in = MRJobConfiguration.getProgramBlocks(job);
+			ParForBody body = ProgramConverter.parseParForBody(in, (int)_workerID);
+			_childBlocks = body.getChildBlocks();
+			_ec          = body.getEc();				
+			_resultVars  = body.getResultVarNames();
+	
+			//init local cache manager 
+			if( !CacheableData.isCachingActive() ) {
+				String uuid = IDHandler.createDistributedUniqueID();
+				LocalFileUtils.createWorkingDirectoryWithUUID( uuid );
+				CacheableData.initCaching( uuid ); //incl activation, cache dir creation (each map task gets its own dir for simplified cleanup)
+			}
+			if( !CacheableData.cacheEvictionLocalFilePrefix.contains("_") ){ //account for local mode
+				CacheableData.cacheEvictionLocalFilePrefix = CacheableData.cacheEvictionLocalFilePrefix +"_" + _workerID; 
+			}
+			
+			//ensure that resultvar files are not removed
+			super.pinResultVariables();
+		
+			//enable/disable caching (if required)
+			boolean cpCaching = MRJobConfiguration.getParforCachingConfig( job );
+			if( !cpCaching )
+				CacheableData.disableCaching();
+
+			_numTasks    = 0;
+			_numIters    = 0;			
+		}
+		catch(Exception ex)
+		{
+			throw new RuntimeException(ex);
+		}
+		
+		//disable parfor stat monitoring, reporting execution times via counters not useful 
+		StatisticMonitor.disableStatMonitoring();
+		
+		//always reset stats because counters per map task (for case of JVM reuse)
+		if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job) )
+		{
+			CacheStatistics.reset();
+			Statistics.reset();
+		}
+	}
+	
+	/**
+	 * 
+	 */
+	@Override
+	public void close() 
+	    throws IOException 
+	{
+		try
+		{
+			//write output if required (matrix indexed write)
+			RemoteParForUtils.exportResultVariables( _workerID, _ec.getVariables(), _resultVars, _out );
+		
+			//statistic maintenance (after final export)
+			RemoteParForUtils.incrementParForMRCounters(_report, 0, 0);
+			
+			//print heaver hitter per task
+			JobConf job = ConfigurationManager.getCachedJobConf();
+			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job) )
+				LOG.info("\nSystemML Statistics:\nHeavy hitter instructions (name, time, count):\n" + Statistics.getHeavyHitters(10));		
+		}
+		catch(Exception ex)
+		{
+			throw new IOException( ex );
+		}
+		
+		//cleanup cache and local tmp dir
+		RemoteParForUtils.cleanupWorkingDirectories();
+		
+		//ensure caching is not disabled for CP in local mode
+		CacheableData.enableCaching();
+	}
+	
+	/**
+	 * Collects a matrixblock partition from a given input iterator over 
+	 * binary blocks.
+	 * 
+	 * Note it reuses the instance attribute _partition - multiple calls
+	 * will overwrite the result.
+	 * 
+	 * @param valueList
+	 * @return
+	 * @throws IOException 
+	 */
+	private MatrixBlock collectBinaryBlock( Iterator<Writable> valueList ) 
+		throws IOException 
+	{
+		try
+		{
+			//reset reuse block, keep configured representation
+			_partition.reset(_rlen, _clen);	
+
+			while( valueList.hasNext() )
+			{
+				PairWritableBlock pairValue = (PairWritableBlock)valueList.next();
+				int row_offset = (int)(pairValue.indexes.getRowIndex()-1)*_brlen;
+				int col_offset = (int)(pairValue.indexes.getColumnIndex()-1)*_bclen;
+				MatrixBlock block = pairValue.block;
+				if( !_partition.isInSparseFormat() ) //DENSE
+				{
+					_partition.copy( row_offset, row_offset+block.getNumRows()-1, 
+							   col_offset, col_offset+block.getNumColumns()-1,
+							   pairValue.block, false ); 
+				}
+				else //SPARSE 
+				{
+					_partition.appendToSparse(pairValue.block, row_offset, col_offset);
+				}
+			}
+
+			//final partition cleanup
+			cleanupCollectedMatrixPartition( _partition.isInSparseFormat() );
+		}
+		catch(DMLRuntimeException ex)
+		{
+			throw new IOException(ex);
+		}
+		
+		return _partition;
+	}
+	
+	
+	/**
+	 * Collects a matrixblock partition from a given input iterator over 
+	 * binary cells.
+	 * 
+	 * Note it reuses the instance attribute _partition - multiple calls
+	 * will overwrite the result.
+	 * 
+	 * @param valueList
+	 * @return
+	 * @throws IOException 
+	 */
+	private MatrixBlock collectBinaryCellInput( Iterator<Writable> valueList ) 
+		throws IOException 
+	{
+		//reset reuse block, keep configured representation
+		if( _tSparseCol )
+			_partition.reset(_clen, _rlen);	
+		else
+			_partition.reset(_rlen, _clen);
+		
+		switch( _dpf )
+		{
+			case ROW_WISE:
+				while( valueList.hasNext() )
+				{
+					PairWritableCell pairValue = (PairWritableCell)valueList.next();
+					if( pairValue.indexes.getColumnIndex()<0 )
+						continue; //cells used to ensure empty partitions
+					_partition.quickSetValue(0, (int)pairValue.indexes.getColumnIndex()-1, pairValue.cell.getValue());
+				}
+				break;
+			case COLUMN_WISE:
+				while( valueList.hasNext() )
+				{
+					PairWritableCell pairValue = (PairWritableCell)valueList.next();
+					if( pairValue.indexes.getRowIndex()<0 )
+						continue; //cells used to ensure empty partitions
+					if( _tSparseCol )
+						_partition.appendValue(0,(int)pairValue.indexes.getRowIndex()-1, pairValue.cell.getValue());
+					else
+						_partition.quickSetValue((int)pairValue.indexes.getRowIndex()-1, 0, pairValue.cell.getValue());
+				}
+				break;
+			default: 
+				throw new IOException("Partition format not yet supported in fused partition-execute: "+_dpf);
+		}
+		
+		//final partition cleanup
+		cleanupCollectedMatrixPartition(_tSparseCol);
+		
+		return _partition;
+	}
+	
+	/**
+	 * 
+	 * @param sort
+	 * @throws IOException
+	 */
+	private void cleanupCollectedMatrixPartition(boolean sort) 
+		throws IOException
+	{
+		//sort sparse row contents if required
+		if( _partition.isInSparseFormat() && sort )
+			_partition.sortSparseRows();
+
+		//ensure right number of nnz
+		if( !_partition.isInSparseFormat() )
+			_partition.recomputeNonZeros();
+			
+		//exam and switch dense/sparse representation
+		try {
+			_partition.examSparsity();
+		}
+		catch(Exception ex){
+			throw new IOException(ex);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForUtils.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForUtils.java
index 2426bc2..67beff6 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForUtils.java
@@ -1,266 +1,266 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import scala.Tuple2;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
-import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.util.LocalFileUtils;
-import org.apache.sysml.utils.Statistics;
-
-/**
- * Common functionalities for parfor workers in MR jobs. Used by worker wrappers in
- * mappers (base RemoteParFor) and reducers (fused data partitioning and parfor)
- * 
- */
-public class RemoteParForUtils 
-{
-	
-	/**
-	 * 
-	 * @param reporter
-	 * @param deltaTasks
-	 * @param deltaIterations
-	 */
-	public static void incrementParForMRCounters(Reporter reporter, long deltaTasks, long deltaIterations)
-	{
-		//report parfor counters
-		if( deltaTasks>0 )
-			reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMTASKS.toString(), deltaTasks);
-		if( deltaIterations>0 )
-			reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMITERS.toString(), deltaIterations);
-		
-		JobConf job = ConfigurationManager.getCachedJobConf();
-		if( DMLScript.STATISTICS  && !InfrastructureAnalyzer.isLocalMode(job) ) 
-		{
-			//report cache statistics
-			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JITCOMPILE.toString(), Statistics.getJITCompileTime());
-			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_COUNT.toString(), Statistics.getJVMgcCount());
-			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_TIME.toString(), Statistics.getJVMgcTime());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_MEM.toString(), CacheStatistics.getMemHits());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString(), CacheStatistics.getFSBuffHits());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_FS.toString(), CacheStatistics.getFSHits());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_HDFS.toString(), CacheStatistics.getHDFSHits());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString(), CacheStatistics.getFSBuffWrites());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_FS.toString(), CacheStatistics.getFSWrites());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_HDFS.toString(), CacheStatistics.getHDFSWrites());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_ACQR.toString(), CacheStatistics.getAcquireRTime());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_ACQM.toString(), CacheStatistics.getAcquireMTime());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_RLS.toString(), CacheStatistics.getReleaseTime());
-			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_EXP.toString(), CacheStatistics.getExportTime());
-		
-			//reset cache statistics to prevent overlapping reporting
-			CacheStatistics.reset();
-		}
-	}
-	
-	/**
-	 * 
-	 * @param workerID
-	 * @param vars
-	 * @param resultVars
-	 * @param out
-	 * @throws DMLRuntimeException
-	 * @throws IOException
-	 */
-	public static void exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars, OutputCollector<Writable, Writable> out ) 
-			throws DMLRuntimeException, IOException
-	{
-		exportResultVariables(workerID, vars, resultVars, null, out);
-	}	
-	
-	/**
-	 * For remote MR parfor workers.
-	 * 
-	 * @param workerID
-	 * @param vars
-	 * @param resultVars
-	 * @param rvarFnames
-	 * @param out
-	 * @throws DMLRuntimeException
-	 * @throws IOException
-	 */
-	public static void exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars, 
-			                                  HashMap<String,String> rvarFnames, OutputCollector<Writable, Writable> out ) 
-		throws DMLRuntimeException, IOException
-	{
-		//create key and value for reuse
-		LongWritable okey = new LongWritable( workerID ); 
-		Text ovalue = new Text();
-		
-		//foreach result variables probe if export necessary
-		for( String rvar : resultVars )
-		{
-			Data dat = vars.get( rvar );
-			
-			//export output variable to HDFS (see RunMRJobs)
-			if ( dat != null && dat.getDataType() == DataType.MATRIX ) 
-			{
-				MatrixObject mo = (MatrixObject) dat;
-				if( mo.isDirty() )
-				{
-					if( ParForProgramBlock.ALLOW_REUSE_MR_PAR_WORKER && rvarFnames!=null )
-					{
-						String fname = rvarFnames.get( rvar );
-						if( fname!=null )
-							mo.setFileName( fname );
-							
-						//export result var (iff actually modified in parfor)
-						mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)		
-						rvarFnames.put(rvar, mo.getFileName());	
-					}
-					else
-					{
-						//export result var (iff actually modified in parfor)
-						mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
-					}
-					
-					//pass output vars (scalars by value, matrix by ref) to result
-					//(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
-					String datStr = ProgramConverter.serializeDataObject(rvar, mo);
-					ovalue.set( datStr );
-					out.collect( okey, ovalue );
-				}
-			}	
-		}
-	}
-	
-	/**
-	 * For remote Spark parfor workers. This is a simplified version compared to MR.
-	 * 
-	 * @param workerID
-	 * @param vars
-	 * @param resultVars
-	 * @param rvarFnames
-	 * @throws DMLRuntimeException
-	 * @throws IOException
-	 */
-	public static ArrayList<String> exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars) 
-		throws DMLRuntimeException, IOException
-	{
-		ArrayList<String> ret = new ArrayList<String>();
-		
-		//foreach result variables probe if export necessary
-		for( String rvar : resultVars )
-		{
-			Data dat = vars.get( rvar );
-			
-			//export output variable to HDFS (see RunMRJobs)
-			if ( dat != null && dat.getDataType() == DataType.MATRIX ) 
-			{
-				MatrixObject mo = (MatrixObject) dat;
-				if( mo.isDirty() )
-				{
-					//export result var (iff actually modified in parfor)
-					mo.exportData(); 
-					
-					
-					//pass output vars (scalars by value, matrix by ref) to result
-					//(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
-					ret.add( ProgramConverter.serializeDataObject(rvar, mo) );
-				}
-			}	
-		}
-		
-		return ret;
-	}
-		
-	
-	/**
-	 * Cleanup all temporary files created by this SystemML process
-	 * instance.
-	 * 
-	 */
-	public static void cleanupWorkingDirectories()
-	{
-		//use the given job configuration for infrastructure analysis (see configure);
-		//this is important for robustness w/ misconfigured classpath which also contains
-		//core-default.xml and hence hides the actual cluster configuration; otherwise
-		//there is missing cleanup of working directories 
-		JobConf job = ConfigurationManager.getCachedJobConf();
-		
-		if( !InfrastructureAnalyzer.isLocalMode(job) )
-		{
-			//delete cache files
-			CacheableData.cleanupCacheDir();
-			//disable caching (prevent dynamic eviction)
-			CacheableData.disableCaching();
-			//cleanup working dir (e.g., of CP_FILE instructions)
-			LocalFileUtils.cleanupWorkingDirectory();
-		}
-	}
-	
-	/**
-	 * 
-	 * @param out
-	 * @return
-	 * @throws DMLRuntimeException
-	 * @throws IOException
-	 */
-	public static LocalVariableMap[] getResults( List<Tuple2<Long,String>> out, Log LOG ) 
-		throws DMLRuntimeException
-	{
-		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
-
-		int countAll = 0;
-		for( Tuple2<Long,String> entry : out )
-		{
-			Long key = entry._1();
-			String val = entry._2();
-			if( !tmp.containsKey( key ) )
-        		tmp.put(key, new LocalVariableMap ());	   
-			Object[] dat = ProgramConverter.parseDataObject( val );
-        	tmp.get(key).put((String)dat[0], (Data)dat[1]);
-        	countAll++;
-		}
-
-		if( LOG != null ) {
-			LOG.debug("Num remote worker results (before deduplication): "+countAll);
-			LOG.debug("Num remote worker results: "+tmp.size());
-		}
-		
-		//create return array
-		return tmp.values().toArray(new LocalVariableMap[0]);	
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.controlprogram.parfor;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import scala.Tuple2;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.parser.Expression.DataType;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
+import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
+import org.apache.sysml.runtime.instructions.cp.Data;
+import org.apache.sysml.runtime.util.LocalFileUtils;
+import org.apache.sysml.utils.Statistics;
+
+/**
+ * Common functionalities for parfor workers in MR jobs. Used by worker wrappers in
+ * mappers (base RemoteParFor) and reducers (fused data partitioning and parfor)
+ * 
+ */
+public class RemoteParForUtils 
+{
+	
+	/**
+	 * 
+	 * @param reporter
+	 * @param deltaTasks
+	 * @param deltaIterations
+	 */
+	public static void incrementParForMRCounters(Reporter reporter, long deltaTasks, long deltaIterations)
+	{
+		//report parfor counters
+		if( deltaTasks>0 )
+			reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMTASKS.toString(), deltaTasks);
+		if( deltaIterations>0 )
+			reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMITERS.toString(), deltaIterations);
+		
+		JobConf job = ConfigurationManager.getCachedJobConf();
+		if( DMLScript.STATISTICS  && !InfrastructureAnalyzer.isLocalMode(job) ) 
+		{
+			//report cache statistics
+			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JITCOMPILE.toString(), Statistics.getJITCompileTime());
+			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_COUNT.toString(), Statistics.getJVMgcCount());
+			reporter.incrCounter( ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_TIME.toString(), Statistics.getJVMgcTime());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_MEM.toString(), CacheStatistics.getMemHits());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString(), CacheStatistics.getFSBuffHits());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_FS.toString(), CacheStatistics.getFSHits());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_HITS_HDFS.toString(), CacheStatistics.getHDFSHits());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString(), CacheStatistics.getFSBuffWrites());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_FS.toString(), CacheStatistics.getFSWrites());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_WRITES_HDFS.toString(), CacheStatistics.getHDFSWrites());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_ACQR.toString(), CacheStatistics.getAcquireRTime());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_ACQM.toString(), CacheStatistics.getAcquireMTime());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_RLS.toString(), CacheStatistics.getReleaseTime());
+			reporter.incrCounter( CacheableData.CACHING_COUNTER_GROUP_NAME, CacheStatistics.Stat.CACHE_TIME_EXP.toString(), CacheStatistics.getExportTime());
+		
+			//reset cache statistics to prevent overlapping reporting
+			CacheStatistics.reset();
+		}
+	}
+	
+	/**
+	 * 
+	 * @param workerID
+	 * @param vars
+	 * @param resultVars
+	 * @param out
+	 * @throws DMLRuntimeException
+	 * @throws IOException
+	 */
+	public static void exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars, OutputCollector<Writable, Writable> out ) 
+			throws DMLRuntimeException, IOException
+	{
+		exportResultVariables(workerID, vars, resultVars, null, out);
+	}	
+	
+	/**
+	 * For remote MR parfor workers.
+	 * 
+	 * @param workerID
+	 * @param vars
+	 * @param resultVars
+	 * @param rvarFnames
+	 * @param out
+	 * @throws DMLRuntimeException
+	 * @throws IOException
+	 */
+	public static void exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars, 
+			                                  HashMap<String,String> rvarFnames, OutputCollector<Writable, Writable> out ) 
+		throws DMLRuntimeException, IOException
+	{
+		//create key and value for reuse
+		LongWritable okey = new LongWritable( workerID ); 
+		Text ovalue = new Text();
+		
+		//foreach result variables probe if export necessary
+		for( String rvar : resultVars )
+		{
+			Data dat = vars.get( rvar );
+			
+			//export output variable to HDFS (see RunMRJobs)
+			if ( dat != null && dat.getDataType() == DataType.MATRIX ) 
+			{
+				MatrixObject mo = (MatrixObject) dat;
+				if( mo.isDirty() )
+				{
+					if( ParForProgramBlock.ALLOW_REUSE_MR_PAR_WORKER && rvarFnames!=null )
+					{
+						String fname = rvarFnames.get( rvar );
+						if( fname!=null )
+							mo.setFileName( fname );
+							
+						//export result var (iff actually modified in parfor)
+						mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)		
+						rvarFnames.put(rvar, mo.getFileName());	
+					}
+					else
+					{
+						//export result var (iff actually modified in parfor)
+						mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
+					}
+					
+					//pass output vars (scalars by value, matrix by ref) to result
+					//(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
+					String datStr = ProgramConverter.serializeDataObject(rvar, mo);
+					ovalue.set( datStr );
+					out.collect( okey, ovalue );
+				}
+			}	
+		}
+	}
+	
+	/**
+	 * For remote Spark parfor workers. This is a simplified version compared to MR.
+	 * 
+	 * @param workerID
+	 * @param vars
+	 * @param resultVars
+	 * @param rvarFnames
+	 * @throws DMLRuntimeException
+	 * @throws IOException
+	 */
+	public static ArrayList<String> exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<String> resultVars) 
+		throws DMLRuntimeException, IOException
+	{
+		ArrayList<String> ret = new ArrayList<String>();
+		
+		//foreach result variables probe if export necessary
+		for( String rvar : resultVars )
+		{
+			Data dat = vars.get( rvar );
+			
+			//export output variable to HDFS (see RunMRJobs)
+			if ( dat != null && dat.getDataType() == DataType.MATRIX ) 
+			{
+				MatrixObject mo = (MatrixObject) dat;
+				if( mo.isDirty() )
+				{
+					//export result var (iff actually modified in parfor)
+					mo.exportData(); 
+					
+					
+					//pass output vars (scalars by value, matrix by ref) to result
+					//(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
+					ret.add( ProgramConverter.serializeDataObject(rvar, mo) );
+				}
+			}	
+		}
+		
+		return ret;
+	}
+		
+	
+	/**
+	 * Cleanup all temporary files created by this SystemML process
+	 * instance.
+	 * 
+	 */
+	public static void cleanupWorkingDirectories()
+	{
+		//use the given job configuration for infrastructure analysis (see configure);
+		//this is important for robustness w/ misconfigured classpath which also contains
+		//core-default.xml and hence hides the actual cluster configuration; otherwise
+		//there is missing cleanup of working directories 
+		JobConf job = ConfigurationManager.getCachedJobConf();
+		
+		if( !InfrastructureAnalyzer.isLocalMode(job) )
+		{
+			//delete cache files
+			CacheableData.cleanupCacheDir();
+			//disable caching (prevent dynamic eviction)
+			CacheableData.disableCaching();
+			//cleanup working dir (e.g., of CP_FILE instructions)
+			LocalFileUtils.cleanupWorkingDirectory();
+		}
+	}
+	
+	/**
+	 * 
+	 * @param out
+	 * @return
+	 * @throws DMLRuntimeException
+	 * @throws IOException
+	 */
+	public static LocalVariableMap[] getResults( List<Tuple2<Long,String>> out, Log LOG ) 
+		throws DMLRuntimeException
+	{
+		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
+
+		int countAll = 0;
+		for( Tuple2<Long,String> entry : out )
+		{
+			Long key = entry._1();
+			String val = entry._2();
+			if( !tmp.containsKey( key ) )
+        		tmp.put(key, new LocalVariableMap ());	   
+			Object[] dat = ProgramConverter.parseDataObject( val );
+        	tmp.get(key).put((String)dat[0], (Data)dat[1]);
+        	countAll++;
+		}
+
+		if( LOG != null ) {
+			LOG.debug("Num remote worker results (before deduplication): "+countAll);
+			LOG.debug("Num remote worker results: "+tmp.size());
+		}
+		
+		//create return array
+		return tmp.values().toArray(new LocalVariableMap[0]);	
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
index 6f478bc..e6556fa 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
@@ -1,58 +1,58 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#PerfTestTool: DML template for estimation cost functions.
-
-dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem") 
-
-dynWrite = externalFunction(Matrix[Double] R, String fname) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem") 
-
-solve = externalFunction(Matrix[Double] A, Matrix[Double] y) 
-return (Matrix[Double] b) 
-implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem") 
-
-k = %numModels%;
-m = -1; 
-n = -1;
-
-dummy = matrix(1,rows=1,cols=1); 
-
-for( i in 1:k, par=8, mode=LOCAL )
-{
-   sin1 = "./conf/PerfTestTool/"+i+"_in1.csv";   
-   sin2 = "./conf/PerfTestTool/"+i+"_in2.csv";   
-   
-   D = dynRead( dummy, sin1, m, n );
-   y = dynRead( dummy, sin2, m, 1 );
-   
-   A = t(D) %*% D; # X'X
-   b = t(D) %*% y; # X'y
-   beta = solve(A,b); 
-
-   sout = "./conf/PerfTestTool/"+i+"_out.csv";   
-   
-   X=dynWrite( beta, sout );
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+#PerfTestTool: DML template for estimation cost functions.
+
+dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) 
+return (Matrix[Double] D) 
+implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem") 
+
+dynWrite = externalFunction(Matrix[Double] R, String fname) 
+return (Matrix[Double] D) 
+implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem") 
+
+solve = externalFunction(Matrix[Double] A, Matrix[Double] y) 
+return (Matrix[Double] b) 
+implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem") 
+
+k = %numModels%;
+m = -1; 
+n = -1;
+
+dummy = matrix(1,rows=1,cols=1); 
+
+for( i in 1:k, par=8, mode=LOCAL )
+{
+   sin1 = "./conf/PerfTestTool/"+i+"_in1.csv";   
+   sin2 = "./conf/PerfTestTool/"+i+"_in2.csv";   
+   
+   D = dynRead( dummy, sin1, m, n );
+   y = dynRead( dummy, sin2, m, 1 );
+   
+   A = t(D) %*% D; # X'X
+   b = t(D) %*% y; # X'y
+   beta = solve(A,b); 
+
+   sout = "./conf/PerfTestTool/"+i+"_out.csv";   
+   
+   X=dynWrite( beta, sout );
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/instructions/spark/data/BroadcastObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/BroadcastObject.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/BroadcastObject.java
index 1dd419f..13c68e2 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/BroadcastObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/BroadcastObject.java
@@ -1,64 +1,64 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.instructions.spark.data;
-
-import java.lang.ref.SoftReference;
-
-import org.apache.spark.broadcast.Broadcast;
-
-public class BroadcastObject extends LineageObject
-{
-	//soft reference storage for graceful cleanup in case of memory pressure
-	private SoftReference<PartitionedBroadcastMatrix> _bcHandle = null;
-	
-	public BroadcastObject( PartitionedBroadcastMatrix bvar, String varName )
-	{
-		_bcHandle = new SoftReference<PartitionedBroadcastMatrix>(bvar);
-		_varName = varName;
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public PartitionedBroadcastMatrix getBroadcast()
-	{
-		return _bcHandle.get();
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public boolean isValid() 
-	{
-		//check for evicted soft reference
-		PartitionedBroadcastMatrix pbm = _bcHandle.get();
-		if( pbm == null )
-			return false;
-		
-		//check for validity of individual broadcasts
-		Broadcast<PartitionedMatrixBlock>[] tmp = pbm.getBroadcasts();
-		for( Broadcast<PartitionedMatrixBlock> bc : tmp )
-			if( !bc.isValid() )
-				return false;		
-		return true;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.spark.data;
+
+import java.lang.ref.SoftReference;
+
+import org.apache.spark.broadcast.Broadcast;
+
+public class BroadcastObject extends LineageObject
+{
+	//soft reference storage for graceful cleanup in case of memory pressure
+	private SoftReference<PartitionedBroadcastMatrix> _bcHandle = null;
+	
+	public BroadcastObject( PartitionedBroadcastMatrix bvar, String varName )
+	{
+		_bcHandle = new SoftReference<PartitionedBroadcastMatrix>(bvar);
+		_varName = varName;
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public PartitionedBroadcastMatrix getBroadcast()
+	{
+		return _bcHandle.get();
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public boolean isValid() 
+	{
+		//check for evicted soft reference
+		PartitionedBroadcastMatrix pbm = _bcHandle.get();
+		if( pbm == null )
+			return false;
+		
+		//check for validity of individual broadcasts
+		Broadcast<PartitionedMatrixBlock>[] tmp = pbm.getBroadcasts();
+		for( Broadcast<PartitionedMatrixBlock> bc : tmp )
+			if( !bc.isValid() )
+				return false;		
+		return true;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/instructions/spark/data/LineageObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/LineageObject.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/LineageObject.java
index b2bb62c..bcf37bb 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/LineageObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/LineageObject.java
@@ -1,83 +1,83 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.instructions.spark.data;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-
-public abstract class LineageObject 
-{
-
-	//basic lineage information
-	protected int _numRef = -1;
-	protected List<LineageObject> _childs = null;
-	protected String _varName = null;
-	
-	//N:1 back reference to matrix object
-	protected MatrixObject _mo = null;
-	
-	protected LineageObject()
-	{
-		_numRef = 0;
-		_childs = new ArrayList<LineageObject>();
-	}
-	
-	public String getVarName() {
-		return _varName;
-	}
-	
-	public int getNumReferences()
-	{
-		return _numRef;
-	}
-	
-	public void setBackReference(MatrixObject mo)
-	{
-		_mo = mo;
-	}
-	
-	public boolean hasBackReference()
-	{
-		return (_mo != null);
-	}
-	
-	public void incrementNumReferences()
-	{
-		_numRef++;
-	}
-	
-	public void decrementNumReferences()
-	{
-		_numRef--;
-	}
-	
-	public List<LineageObject> getLineageChilds()
-	{
-		return _childs;
-	}
-	
-	public void addLineageChild(LineageObject lob)
-	{
-		lob.incrementNumReferences();
-		_childs.add( lob );
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.spark.data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+
+public abstract class LineageObject 
+{
+
+	//basic lineage information
+	protected int _numRef = -1;
+	protected List<LineageObject> _childs = null;
+	protected String _varName = null;
+	
+	//N:1 back reference to matrix object
+	protected MatrixObject _mo = null;
+	
+	protected LineageObject()
+	{
+		_numRef = 0;
+		_childs = new ArrayList<LineageObject>();
+	}
+	
+	public String getVarName() {
+		return _varName;
+	}
+	
+	public int getNumReferences()
+	{
+		return _numRef;
+	}
+	
+	public void setBackReference(MatrixObject mo)
+	{
+		_mo = mo;
+	}
+	
+	public boolean hasBackReference()
+	{
+		return (_mo != null);
+	}
+	
+	public void incrementNumReferences()
+	{
+		_numRef++;
+	}
+	
+	public void decrementNumReferences()
+	{
+		_numRef--;
+	}
+	
+	public List<LineageObject> getLineageChilds()
+	{
+		return _childs;
+	}
+	
+	public void addLineageChild(LineageObject lob)
+	{
+		lob.incrementNumReferences();
+		_childs.add( lob );
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/instructions/spark/data/RDDObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/RDDObject.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/RDDObject.java
index fb7e773..605e7ca 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/RDDObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/RDDObject.java
@@ -1,124 +1,124 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.instructions.spark.data;
-
-import org.apache.spark.api.java.JavaPairRDD;
-
-public class RDDObject extends LineageObject
-{
-
-	private JavaPairRDD<?,?> _rddHandle = null;
-	
-	//meta data on origin of given rdd handle
-	private boolean _checkpointed = false; //created via checkpoint instruction
-	private boolean _hdfsfile = false;     //created from hdfs file
-	private String  _hdfsFname = null;     //hdfs filename, if created from hdfs.  
-	
-	public RDDObject( JavaPairRDD<?,?> rddvar, String varName)
-	{
-		_rddHandle = rddvar;
-		_varName = varName;
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public JavaPairRDD<?,?> getRDD()
-	{
-		return _rddHandle;
-	}
-	
-	public void setCheckpointRDD( boolean flag )
-	{
-		_checkpointed = flag;
-	}
-	
-	public boolean isCheckpointRDD() 
-	{
-		return _checkpointed;
-	}
-	
-	public void setHDFSFile( boolean flag ) {
-		_hdfsfile = flag;
-	}
-	
-	public void setHDFSFilename( String fname ) {
-		_hdfsFname = fname;
-	}
-	
-	public boolean isHDFSFile() {
-		return _hdfsfile;
-	}
-	
-	public String getHDFSFilename() {
-		return _hdfsFname;
-	}
-	
-
-	/**
-	 * Indicates if rdd is an hdfs file or a checkpoint over an hdfs file;
-	 * in both cases, we can directly read the file instead of collecting
-	 * the given rdd.
-	 * 
-	 * @return
-	 */
-	public boolean allowsShortCircuitRead()
-	{
-		boolean ret = isHDFSFile();
-		
-		if( isCheckpointRDD() && getLineageChilds().size() == 1 ) {
-			LineageObject lo = getLineageChilds().get(0);
-			ret = ( lo instanceof RDDObject && ((RDDObject)lo).isHDFSFile() );
-		}
-		
-		return ret;
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public boolean allowsShortCircuitCollect()
-	{
-		return ( isCheckpointRDD() && getLineageChilds().size() == 1
-			     && getLineageChilds().get(0) instanceof RDDObject );
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public boolean rHasCheckpointRDDChilds()
-	{
-		//probe for checkpoint rdd
-		if( _checkpointed )
-			return true;
-		
-		//process childs recursively
-		boolean ret = false;
-		for( LineageObject lo : getLineageChilds() ) {
-			if( lo instanceof RDDObject )
-				ret |= ((RDDObject)lo).rHasCheckpointRDDChilds();
-		}
-		
-		return ret;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.spark.data;
+
+import org.apache.spark.api.java.JavaPairRDD;
+
+public class RDDObject extends LineageObject
+{
+
+	private JavaPairRDD<?,?> _rddHandle = null;
+	
+	//meta data on origin of given rdd handle
+	private boolean _checkpointed = false; //created via checkpoint instruction
+	private boolean _hdfsfile = false;     //created from hdfs file
+	private String  _hdfsFname = null;     //hdfs filename, if created from hdfs.  
+	
+	public RDDObject( JavaPairRDD<?,?> rddvar, String varName)
+	{
+		_rddHandle = rddvar;
+		_varName = varName;
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public JavaPairRDD<?,?> getRDD()
+	{
+		return _rddHandle;
+	}
+	
+	public void setCheckpointRDD( boolean flag )
+	{
+		_checkpointed = flag;
+	}
+	
+	public boolean isCheckpointRDD() 
+	{
+		return _checkpointed;
+	}
+	
+	public void setHDFSFile( boolean flag ) {
+		_hdfsfile = flag;
+	}
+	
+	public void setHDFSFilename( String fname ) {
+		_hdfsFname = fname;
+	}
+	
+	public boolean isHDFSFile() {
+		return _hdfsfile;
+	}
+	
+	public String getHDFSFilename() {
+		return _hdfsFname;
+	}
+	
+
+	/**
+	 * Indicates if rdd is an hdfs file or a checkpoint over an hdfs file;
+	 * in both cases, we can directly read the file instead of collecting
+	 * the given rdd.
+	 * 
+	 * @return
+	 */
+	public boolean allowsShortCircuitRead()
+	{
+		boolean ret = isHDFSFile();
+		
+		if( isCheckpointRDD() && getLineageChilds().size() == 1 ) {
+			LineageObject lo = getLineageChilds().get(0);
+			ret = ( lo instanceof RDDObject && ((RDDObject)lo).isHDFSFile() );
+		}
+		
+		return ret;
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public boolean allowsShortCircuitCollect()
+	{
+		return ( isCheckpointRDD() && getLineageChilds().size() == 1
+			     && getLineageChilds().get(0) instanceof RDDObject );
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public boolean rHasCheckpointRDDChilds()
+	{
+		//probe for checkpoint rdd
+		if( _checkpointed )
+			return true;
+		
+		//process childs recursively
+		boolean ret = false;
+		for( LineageObject lo : getLineageChilds() ) {
+			if( lo instanceof RDDObject )
+				ret |= ((RDDObject)lo).rHasCheckpointRDDChilds();
+		}
+		
+		return ret;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/mapred/GroupedAggMRCombiner.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GroupedAggMRCombiner.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GroupedAggMRCombiner.java
index e561f3c..6cb0830 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GroupedAggMRCombiner.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GroupedAggMRCombiner.java
@@ -1,167 +1,167 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.mapred;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.sysml.runtime.functionobjects.CM;
-import org.apache.sysml.runtime.functionobjects.KahanPlus;
-import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
-import org.apache.sysml.runtime.instructions.cp.KahanObject;
-import org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction;
-import org.apache.sysml.runtime.matrix.data.TaggedMatrixIndexes;
-import org.apache.sysml.runtime.matrix.data.WeightedCell;
-import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
-import org.apache.sysml.runtime.matrix.operators.CMOperator;
-import org.apache.sysml.runtime.matrix.operators.Operator;
-
-
-public class GroupedAggMRCombiner extends ReduceBase
-	implements Reducer<TaggedMatrixIndexes, WeightedCell, TaggedMatrixIndexes, WeightedCell>
-{	
-	//grouped aggregate instructions
-	private HashMap<Byte, GroupedAggregateInstruction> grpaggInstructions = new HashMap<Byte, GroupedAggregateInstruction>();
-	
-	//reused intermediate objects
-	private CM_COV_Object cmObj = new CM_COV_Object(); 
-	private HashMap<Byte, CM> cmFn = new HashMap<Byte, CM>();
-	private WeightedCell outCell = new WeightedCell();
-
-	@Override
-	public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values,
-			           OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter)
-		throws IOException 
-	{
-		long start = System.currentTimeMillis();
-		
-		//get aggregate operator
-		GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag());
-		Operator op = ins.getOperator();
-		boolean isPartialAgg = true;
-		
-		//combine iterator to single value
-		try
-		{
-			if(op instanceof CMOperator) //everything except sum
-			{
-				if( ((CMOperator) op).isPartialAggregateOperator() )
-				{
-					cmObj.reset();
-					CM lcmFn = cmFn.get(key.getTag());
-					
-					//partial aggregate cm operator 
-					while( values.hasNext() )
-					{
-						WeightedCell value=values.next();
-						lcmFn.execute(cmObj, value.getValue(), value.getWeight());				
-					}
-					
-					outCell.setValue(cmObj.getRequiredPartialResult(op));
-					outCell.setWeight(cmObj.getWeight());	
-				}
-				else //forward tuples to reducer
-				{
-					isPartialAgg = false; 
-					while( values.hasNext() )
-						out.collect(key, values.next());
-				}				
-			}
-			else if(op instanceof AggregateOperator) //sum
-			{
-				AggregateOperator aggop=(AggregateOperator) op;
-					
-				if( aggop.correctionExists )
-				{
-					KahanObject buffer=new KahanObject(aggop.initialValue, 0);
-					
-					KahanPlus.getKahanPlusFnObject();
-					
-					//partial aggregate with correction
-					while( values.hasNext() )
-					{
-						WeightedCell value=values.next();
-						aggop.increOp.fn.execute(buffer, value.getValue()*value.getWeight());
-					}
-					
-					outCell.setValue(buffer._sum);
-					outCell.setWeight(1);
-				}
-				else //no correction
-				{
-					double v = aggop.initialValue;
-					
-					//partial aggregate without correction
-					while(values.hasNext())
-					{
-						WeightedCell value=values.next();
-						v=aggop.increOp.fn.execute(v, value.getValue()*value.getWeight());
-					}
-					
-					outCell.setValue(v);
-					outCell.setWeight(1);
-				}				
-			}
-			else
-				throw new IOException("Unsupported operator in instruction: " + ins);
-		}
-		catch(Exception ex)
-		{
-			throw new IOException(ex);
-		}
-		
-		//collect the output (to reducer)
-		if( isPartialAgg )
-			out.collect(key, outCell);
-		
-		reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis()-start);
-	}
-
-	@Override
-	public void configure(JobConf job)
-	{
-		try 
-		{
-			GroupedAggregateInstruction[] grpaggIns = MRJobConfiguration.getGroupedAggregateInstructions(job);
-			if( grpaggIns != null )	
-				for(GroupedAggregateInstruction ins : grpaggIns)
-				{
-					grpaggInstructions.put(ins.output, ins);	
-					if( ins.getOperator() instanceof CMOperator )
-						cmFn.put(ins.output, CM.getCMFnObject(((CMOperator)ins.getOperator()).getAggOpType()));
-				}
-		} 
-		catch (Exception e) 
-		{
-			throw new RuntimeException(e);
-		} 
-	}
-	
-	@Override
-	public void close()
-	{
-		//do nothing, overrides unnecessary handling in superclass
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.mapred;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.sysml.runtime.functionobjects.CM;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction;
+import org.apache.sysml.runtime.matrix.data.TaggedMatrixIndexes;
+import org.apache.sysml.runtime.matrix.data.WeightedCell;
+import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
+import org.apache.sysml.runtime.matrix.operators.CMOperator;
+import org.apache.sysml.runtime.matrix.operators.Operator;
+
+
+public class GroupedAggMRCombiner extends ReduceBase
+	implements Reducer<TaggedMatrixIndexes, WeightedCell, TaggedMatrixIndexes, WeightedCell>
+{	
+	//grouped aggregate instructions
+	private HashMap<Byte, GroupedAggregateInstruction> grpaggInstructions = new HashMap<Byte, GroupedAggregateInstruction>();
+	
+	//reused intermediate objects
+	private CM_COV_Object cmObj = new CM_COV_Object(); 
+	private HashMap<Byte, CM> cmFn = new HashMap<Byte, CM>();
+	private WeightedCell outCell = new WeightedCell();
+
+	@Override
+	public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values,
+			           OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter)
+		throws IOException 
+	{
+		long start = System.currentTimeMillis();
+		
+		//get aggregate operator
+		GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag());
+		Operator op = ins.getOperator();
+		boolean isPartialAgg = true;
+		
+		//combine iterator to single value
+		try
+		{
+			if(op instanceof CMOperator) //everything except sum
+			{
+				if( ((CMOperator) op).isPartialAggregateOperator() )
+				{
+					cmObj.reset();
+					CM lcmFn = cmFn.get(key.getTag());
+					
+					//partial aggregate cm operator 
+					while( values.hasNext() )
+					{
+						WeightedCell value=values.next();
+						lcmFn.execute(cmObj, value.getValue(), value.getWeight());				
+					}
+					
+					outCell.setValue(cmObj.getRequiredPartialResult(op));
+					outCell.setWeight(cmObj.getWeight());	
+				}
+				else //forward tuples to reducer
+				{
+					isPartialAgg = false; 
+					while( values.hasNext() )
+						out.collect(key, values.next());
+				}				
+			}
+			else if(op instanceof AggregateOperator) //sum
+			{
+				AggregateOperator aggop=(AggregateOperator) op;
+					
+				if( aggop.correctionExists )
+				{
+					KahanObject buffer=new KahanObject(aggop.initialValue, 0);
+					
+					KahanPlus.getKahanPlusFnObject();
+					
+					//partial aggregate with correction
+					while( values.hasNext() )
+					{
+						WeightedCell value=values.next();
+						aggop.increOp.fn.execute(buffer, value.getValue()*value.getWeight());
+					}
+					
+					outCell.setValue(buffer._sum);
+					outCell.setWeight(1);
+				}
+				else //no correction
+				{
+					double v = aggop.initialValue;
+					
+					//partial aggregate without correction
+					while(values.hasNext())
+					{
+						WeightedCell value=values.next();
+						v=aggop.increOp.fn.execute(v, value.getValue()*value.getWeight());
+					}
+					
+					outCell.setValue(v);
+					outCell.setWeight(1);
+				}				
+			}
+			else
+				throw new IOException("Unsupported operator in instruction: " + ins);
+		}
+		catch(Exception ex)
+		{
+			throw new IOException(ex);
+		}
+		
+		//collect the output (to reducer)
+		if( isPartialAgg )
+			out.collect(key, outCell);
+		
+		reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis()-start);
+	}
+
+	@Override
+	public void configure(JobConf job)
+	{
+		try 
+		{
+			GroupedAggregateInstruction[] grpaggIns = MRJobConfiguration.getGroupedAggregateInstructions(job);
+			if( grpaggIns != null )	
+				for(GroupedAggregateInstruction ins : grpaggIns)
+				{
+					grpaggInstructions.put(ins.output, ins);	
+					if( ins.getOperator() instanceof CMOperator )
+						cmFn.put(ins.output, CM.getCMFnObject(((CMOperator)ins.getOperator()).getAggOpType()));
+				}
+		} 
+		catch (Exception e) 
+		{
+			throw new RuntimeException(e);
+		} 
+	}
+	
+	@Override
+	public void close()
+	{
+		//do nothing, overrides unnecessary handling in superclass
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparable.java b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparable.java
index a08631d..fa9843a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparable.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/sort/IndexSortComparable.java
@@ -1,84 +1,84 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.matrix.sort;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.WritableComparable;
-
-@SuppressWarnings("rawtypes")
-public class IndexSortComparable implements WritableComparable
-{
-	
-	protected DoubleWritable _dval = null;
-	protected LongWritable _lval = null; 
-	
-	public IndexSortComparable()
-	{
-		_dval = new DoubleWritable();
-		_lval = new LongWritable();
-	}
-	
-	public void set(double dval, long lval)
-	{
-		_dval.set(dval);
-		_lval.set(lval);
-	}
-
-	@Override
-	public void readFields(DataInput arg0)
-		throws IOException 
-	{
-		_dval.readFields(arg0);
-		_lval.readFields(arg0);
-	}
-
-	@Override
-	public void write(DataOutput arg0) 
-		throws IOException 
-	{
-		_dval.write(arg0);
-		_lval.write(arg0);
-	}
-
-	@Override
-	public int compareTo(Object o) 
-	{
-		//compare only double value (e.g., for partitioner)
-		if( o instanceof DoubleWritable ) {
-			return _dval.compareTo((DoubleWritable) o);
-		}
-		//compare double value and index (e.g., for stable sort)
-		else if( o instanceof IndexSortComparable) {
-			IndexSortComparable that = (IndexSortComparable)o;
-			int tmp = _dval.compareTo(that._dval);
-			if( tmp==0 ) //secondary sort
-				tmp = _lval.compareTo(that._lval);
-			return tmp;
-		}	
-		else {
-			throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.matrix.sort;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.WritableComparable;
+
+@SuppressWarnings("rawtypes")
+public class IndexSortComparable implements WritableComparable
+{
+	
+	protected DoubleWritable _dval = null;
+	protected LongWritable _lval = null; 
+	
+	public IndexSortComparable()
+	{
+		_dval = new DoubleWritable();
+		_lval = new LongWritable();
+	}
+	
+	public void set(double dval, long lval)
+	{
+		_dval.set(dval);
+		_lval.set(lval);
+	}
+
+	@Override
+	public void readFields(DataInput arg0)
+		throws IOException 
+	{
+		_dval.readFields(arg0);
+		_lval.readFields(arg0);
+	}
+
+	@Override
+	public void write(DataOutput arg0) 
+		throws IOException 
+	{
+		_dval.write(arg0);
+		_lval.write(arg0);
+	}
+
+	@Override
+	public int compareTo(Object o) 
+	{
+		//compare only double value (e.g., for partitioner)
+		if( o instanceof DoubleWritable ) {
+			return _dval.compareTo((DoubleWritable) o);
+		}
+		//compare double value and index (e.g., for stable sort)
+		else if( o instanceof IndexSortComparable) {
+			IndexSortComparable that = (IndexSortComparable)o;
+			int tmp = _dval.compareTo(that._dval);
+			if( tmp==0 ) //secondary sort
+				tmp = _lval.compareTo(that._lval);
+			return tmp;
+		}	
+		else {
+			throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
+		}
+	}
+}


[45/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/MultiLogReg.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/MultiLogReg.dml b/scripts/algorithms/MultiLogReg.dml
index 0b18b9d..716678d 100644
--- a/scripts/algorithms/MultiLogReg.dml
+++ b/scripts/algorithms/MultiLogReg.dml
@@ -1,365 +1,365 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Multinomial Logistic Regression using Trust Region methods.
-# (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# --------------------------------------------------------------------------------------------
-# X     String  ---     Location to read the matrix of feature vectors
-# Y     String  ---     Location to read the matrix with category labels
-# B     String  ---     Location to store estimated regression parameters (the betas)
-# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
-# icpt  Int      0      Intercept presence, shifting and rescaling X columns:
-#                       0 = no intercept, no shifting, no rescaling;
-#                       1 = add intercept, but neither shift nor rescale X;
-#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double  0.0     regularization parameter (lambda = 1/C); intercept is not regularized
-# tol   Double 0.000001 tolerance ("epsilon")
-# moi   Int     100     max. number of outer (Newton) iterations
-# mii   Int      0      max. number of inner (conjugate gradient) iterations, 0 = no max
-# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
-# --------------------------------------------------------------------------------------------
-# The largest label represents the baseline category; if label -1 or 0 is present, then it is
-# the baseline label (and it is converted to the largest label).
-#
-# The Log file, when requested, contains the following per-iteration variables in CSV format,
-# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# LINEAR_TERM_MIN       The minimum value of X %*% B, used to check for overflows
-# LINEAR_TERM_MAX       The maximum value of X %*% B, used to check for overflows
-# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
-# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
-# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. matrix B) to new point
-# OBJECTIVE             The loss function we minimize (negative regularized log-likelihood)
-# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
-# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
-# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
-# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
-# GRADIENT_NORM         L2-norm of the loss function gradient (omitted if point is rejected)
-# TRUST_DELTA           Updated trust region size, the "delta"
-# -------------------------------------------------------------------------------------------
-#
-# Script invocation example:
-# hadoop jar SystemML.jar -f MultiLogReg.dml -nvargs icpt=2 reg=1.0 tol=0.000001 moi=100 mii=20
-#     X=INPUT_DIR/X123 Y=INPUT_DIR/Y123 B=OUTPUT_DIR/B123 fmt=csv Log=OUTPUT_DIR/log
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileLog = ifdef ($Log, " ");
-fmtB = ifdef ($fmt, "text");
-
-intercept_status = ifdef ($icpt, 0); # $icpt = 0;
-regularization = ifdef ($reg, 0.0);  # $reg  = 0.0;
-tol = ifdef ($tol, 0.000001);        # $tol  = 0.000001;
-maxiter = ifdef ($moi, 100);         # $moi  = 100;
-maxinneriter = ifdef ($mii, 0);      # $mii  = 0;
-
-tol = as.double (tol); 
-
-print ("BEGIN MULTINOMIAL LOGISTIC REGRESSION SCRIPT");
-print ("Reading X...");
-X = read (fileX);
-print ("Reading Y...");
-Y_vec = read (fileY);
-
-eta0 = 0.0001;
-eta1 = 0.25;
-eta2 = 0.75;
-sigma1 = 0.25;
-sigma2 = 0.5;
-sigma3 = 4.0;
-psi = 0.1;
-
-N = nrow (X);
-D = ncol (X);
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
-{
-    X = append (X, matrix (1, rows = N, cols = 1));
-    D = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = D, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
-    scale_lambda [D, 1] = 0;
-}
-
-if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
-{                           # Important assumption: X [, D] = matrix (1, rows = N, cols = 1)
-    avg_X_cols = t(colSums(X)) / N;
-    var_X_cols = (t(colSums (X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1);
-    is_unsafe = ppred (var_X_cols, 0.0, "<=");
-    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [D, 1] = 1;
-    shift_X = - avg_X_cols * scale_X;
-    shift_X [D, 1] = 0;
-    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
-} else {
-    scale_X = matrix (1, rows = D, cols = 1);
-    shift_X = matrix (0, rows = D, cols = 1);
-    rowSums_X_sq = rowSums (X ^ 2);
-}
-
-# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
-# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
-# The transform is then associatively applied to the other side of the expression,
-# and is rewritten via "scale_X" and "shift_X" as follows:
-#
-# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-# ssX_A  = diag (scale_X) %*% A;
-# ssX_A [D, ] = ssX_A [D, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [D, ];
-
-# Convert "Y_vec" into indicator matrice:
-if (min (Y_vec) <= 0) { 
-    # Category labels "0", "-1" etc. are converted into the largest label
-    max_y = max (Y_vec);
-    Y_vec  = Y_vec  + (- Y_vec  + max_y + 1) * ppred (Y_vec , 0.0, "<=");
-}
-Y = table (seq (1, N, 1), Y_vec);
-K = ncol (Y) - 1;   # The number of  non-baseline categories
-
-lambda = (scale_lambda %*% matrix (1, rows = 1, cols = K)) * regularization;
-delta = 0.5 * sqrt (D) / max (sqrt (rowSums_X_sq));
-
-B = matrix (0, rows = D, cols = K);     ### LT = X %*% (SHIFT/SCALE TRANSFORM) %*% B;
-                                        ### LT = append (LT, matrix (0, rows = N, cols = 1));
-                                        ### LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1);
-P = matrix (1, rows = N, cols = K+1);   ### exp_LT = exp (LT);
-P = P / (K + 1);                        ### P =  exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1));
-obj = N * log (K + 1);                  ### obj = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2));
-
-Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
-if (intercept_status == 2) {
-    Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
-}
-Grad = Grad + lambda * B;
-norm_Grad = sqrt (sum (Grad ^ 2));
-norm_Grad_initial = norm_Grad;
-
-if (maxinneriter == 0) {
-    maxinneriter = D * K;
-}
-iter = 1;
-
-# boolean for convergence check
-converge = (norm_Grad < tol) | (iter > maxiter);
-
-print ("-- Initially:  Objective = " + obj + ",  Gradient Norm = " + norm_Grad + ",  Trust Delta = " + delta);
-
-if (fileLog != " ") {
-    log_str = "OBJECTIVE,0," + obj;
-    log_str = append (log_str, "GRADIENT_NORM,0," + norm_Grad);
-    log_str = append (log_str, "TRUST_DELTA,0," + delta);
-} else {
-    log_str = " ";
-}
-
-while (! converge)
-{
-	# SOLVE TRUST REGION SUB-PROBLEM
-	S = matrix (0, rows = D, cols = K);
-	R = - Grad;
-	V = R;
-	delta2 = delta ^ 2;
-	inneriter = 1;
-	norm_R2 = sum (R ^ 2);
-	innerconverge = (sqrt (norm_R2) <= psi * norm_Grad);
-	is_trust_boundary_reached = 0;
-
-	while (! innerconverge)
-	{
-	    if (intercept_status == 2) {
-	        ssX_V = diag (scale_X) %*% V;
-	        ssX_V [D, ] = ssX_V [D, ] + t(shift_X) %*% V;
-	    } else {
-	        ssX_V = V;
-	    }
-        Q = P [, 1:K] * (X %*% ssX_V);
-        HV = t(X) %*% (Q - P [, 1:K] * (rowSums (Q) %*% matrix (1, rows = 1, cols = K)));
-        if (intercept_status == 2) {
-            HV = diag (scale_X) %*% HV + shift_X %*% HV [D, ];
-        }
-        HV = HV + lambda * V;
-		alpha = norm_R2 / sum (V * HV);
-		Snew = S + alpha * V;
-		norm_Snew2 = sum (Snew ^ 2);
-		if (norm_Snew2 <= delta2)
-		{
-			S = Snew;
-			R = R - alpha * HV;
-			old_norm_R2 = norm_R2 
-			norm_R2 = sum (R ^ 2);
-			V = R + (norm_R2 / old_norm_R2) * V;
-			innerconverge = (sqrt (norm_R2) <= psi * norm_Grad);
-		} else {
-	        is_trust_boundary_reached = 1;
-			sv = sum (S * V);
-			v2 = sum (V ^ 2);
-			s2 = sum (S ^ 2);
-			rad = sqrt (sv ^ 2 + v2 * (delta2 - s2));
-			if (sv >= 0) {
-				alpha = (delta2 - s2) / (sv + rad);
-			} else {
-				alpha = (rad - sv) / v2;
-			}
-			S = S + alpha * V;
-			R = R - alpha * HV;
-			innerconverge = TRUE;
-		}
-	    inneriter = inneriter + 1;
-	    innerconverge = innerconverge | (inneriter > maxinneriter);
-	}  
-	
-	# END TRUST REGION SUB-PROBLEM
-	
-	# compute rho, update B, obtain delta
-	gs = sum (S * Grad);
-	qk = - 0.5 * (gs - sum (S * R));
-	B_new = B + S;
-	if (intercept_status == 2) {
-	    ssX_B_new = diag (scale_X) %*% B_new;
-	    ssX_B_new [D, ] = ssX_B_new [D, ] + t(shift_X) %*% B_new;
-    } else {
-        ssX_B_new = B_new;
-    }
-    
-    LT = append ((X %*% ssX_B_new), matrix (0, rows = N, cols = 1));
-    if (fileLog != " ") {
-        log_str = append (log_str, "LINEAR_TERM_MIN,"  + iter + "," + min (LT));
-        log_str = append (log_str, "LINEAR_TERM_MAX,"  + iter + "," + max (LT));
-    }
-    LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1);
-    exp_LT = exp (LT);
-    P_new  = exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1));
-    obj_new = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2));
-    	
-	# Consider updating LT in the inner loop
-	# Consider the big "obj" and "obj_new" rounding-off their small difference below:
-
-	actred = (obj - obj_new);
-	
-	rho = actred / qk;
-	is_rho_accepted = (rho > eta0);
-	snorm = sqrt (sum (S ^ 2));
-
-    if (fileLog != " ") {
-        log_str = append (log_str, "NUM_CG_ITERS,"     + iter + "," + (inneriter - 1));
-        log_str = append (log_str, "IS_TRUST_REACHED," + iter + "," + is_trust_boundary_reached);
-        log_str = append (log_str, "POINT_STEP_NORM,"  + iter + "," + snorm);
-        log_str = append (log_str, "OBJECTIVE,"        + iter + "," + obj_new);
-        log_str = append (log_str, "OBJ_DROP_REAL,"    + iter + "," + actred);
-        log_str = append (log_str, "OBJ_DROP_PRED,"    + iter + "," + qk);
-        log_str = append (log_str, "OBJ_DROP_RATIO,"   + iter + "," + rho);
-    }
-
-	if (iter == 1) {
-	   delta = min (delta, snorm);
-	}
-
-	alpha2 = obj_new - obj - gs;
-	if (alpha2 <= 0) {
-	   alpha = sigma3;
-	} 
-	else {
-	   alpha = max (sigma1, -0.5 * gs / alpha2);
-	}
-	
-	if (rho < eta0) {
-		delta = min (max (alpha, sigma1) * snorm, sigma2 * delta);
-	}
-	else {
-		if (rho < eta1) {
-			delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta));
-		}
-		else { 
-			if (rho < eta2) {
-				delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta));
-			}
-			else {
-				delta = max (delta, min (alpha * snorm, sigma3 * delta));
-			}
-		}
-	} 
-	
-	if (is_trust_boundary_reached == 1)
-	{
-	    print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations, trust bound REACHED");
-	} else {
-	    print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations");
-	}
-	print ("   -- Obj.Reduction:  Actual = " + actred + ",  Predicted = " + qk + 
-	       "  (A/P: " + (round (10000.0 * rho) / 10000.0) + "),  Trust Delta = " + delta);
-	       
-	if (is_rho_accepted)
-	{
-		B = B_new;
-		P = P_new;
-		Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
-		if (intercept_status == 2) {
-		    Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
-		}
-		Grad = Grad + lambda * B;
-		norm_Grad = sqrt (sum (Grad ^ 2));
-		obj = obj_new;
-	    print ("   -- New Objective = " + obj + ",  Beta Change Norm = " + snorm + ",  Gradient Norm = " + norm_Grad);
-        if (fileLog != " ") {
-            log_str = append (log_str, "IS_POINT_UPDATED," + iter + ",1");
-            log_str = append (log_str, "GRADIENT_NORM,"    + iter + "," + norm_Grad);
-        }
-	} else {
-        if (fileLog != " ") {
-            log_str = append (log_str, "IS_POINT_UPDATED," + iter + ",0");
-        }
-    }	
-	
-    if (fileLog != " ") {
-        log_str = append (log_str, "TRUST_DELTA," + iter + "," + delta);
-    }	
-	
-	iter = iter + 1;
-	converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxiter) |
-	    ((is_trust_boundary_reached == 0) & (abs (actred) < (abs (obj) + abs (obj_new)) * 0.00000000000001)));
-    if (converge) { print ("Termination / Convergence condition satisfied."); } else { print (" "); }
-} 
-
-if (intercept_status == 2) {
-    B_out = diag (scale_X) %*% B;
-    B_out [D, ] = B_out [D, ] + t(shift_X) %*% B;
-} else {
-    B_out = B;
-}
-write (B_out, fileB, format=fmtB);
-
-if (fileLog != " ") {
-    write (log_str, fileLog);
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Multinomial Logistic Regression using Trust Region methods.
+# (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+
+# INPUT PARAMETERS:
+# --------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# --------------------------------------------------------------------------------------------
+# X     String  ---     Location to read the matrix of feature vectors
+# Y     String  ---     Location to read the matrix with category labels
+# B     String  ---     Location to store estimated regression parameters (the betas)
+# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
+# icpt  Int      0      Intercept presence, shifting and rescaling X columns:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg   Double  0.0     regularization parameter (lambda = 1/C); intercept is not regularized
+# tol   Double 0.000001 tolerance ("epsilon")
+# moi   Int     100     max. number of outer (Newton) iterations
+# mii   Int      0      max. number of inner (conjugate gradient) iterations, 0 = no max
+# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
+# --------------------------------------------------------------------------------------------
+# The largest label represents the baseline category; if label -1 or 0 is present, then it is
+# the baseline label (and it is converted to the largest label).
+#
+# The Log file, when requested, contains the following per-iteration variables in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# LINEAR_TERM_MIN       The minimum value of X %*% B, used to check for overflows
+# LINEAR_TERM_MAX       The maximum value of X %*% B, used to check for overflows
+# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
+# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. matrix B) to new point
+# OBJECTIVE             The loss function we minimize (negative regularized log-likelihood)
+# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
+# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
+# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
+# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
+# GRADIENT_NORM         L2-norm of the loss function gradient (omitted if point is rejected)
+# TRUST_DELTA           Updated trust region size, the "delta"
+# -------------------------------------------------------------------------------------------
+#
+# Script invocation example:
+# hadoop jar SystemML.jar -f MultiLogReg.dml -nvargs icpt=2 reg=1.0 tol=0.000001 moi=100 mii=20
+#     X=INPUT_DIR/X123 Y=INPUT_DIR/Y123 B=OUTPUT_DIR/B123 fmt=csv Log=OUTPUT_DIR/log
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileLog = ifdef ($Log, " ");
+fmtB = ifdef ($fmt, "text");
+
+intercept_status = ifdef ($icpt, 0); # $icpt = 0;
+regularization = ifdef ($reg, 0.0);  # $reg  = 0.0;
+tol = ifdef ($tol, 0.000001);        # $tol  = 0.000001;
+maxiter = ifdef ($moi, 100);         # $moi  = 100;
+maxinneriter = ifdef ($mii, 0);      # $mii  = 0;
+
+tol = as.double (tol); 
+
+print ("BEGIN MULTINOMIAL LOGISTIC REGRESSION SCRIPT");
+print ("Reading X...");
+X = read (fileX);
+print ("Reading Y...");
+Y_vec = read (fileY);
+
+eta0 = 0.0001;
+eta1 = 0.25;
+eta2 = 0.75;
+sigma1 = 0.25;
+sigma2 = 0.5;
+sigma3 = 4.0;
+psi = 0.1;
+
+N = nrow (X);
+D = ncol (X);
+
+# Introduce the intercept, shift and rescale the columns of X if needed
+if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
+{
+    X = append (X, matrix (1, rows = N, cols = 1));
+    D = ncol (X);
+}
+
+scale_lambda = matrix (1, rows = D, cols = 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+    scale_lambda [D, 1] = 0;
+}
+
+if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+{                           # Important assumption: X [, D] = matrix (1, rows = N, cols = 1)
+    avg_X_cols = t(colSums(X)) / N;
+    var_X_cols = (t(colSums (X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1);
+    is_unsafe = ppred (var_X_cols, 0.0, "<=");
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [D, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [D, 1] = 0;
+    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
+} else {
+    scale_X = matrix (1, rows = D, cols = 1);
+    shift_X = matrix (0, rows = D, cols = 1);
+    rowSums_X_sq = rowSums (X ^ 2);
+}
+
+# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
+# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
+# The transform is then associatively applied to the other side of the expression,
+# and is rewritten via "scale_X" and "shift_X" as follows:
+#
+# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+# ssX_A  = diag (scale_X) %*% A;
+# ssX_A [D, ] = ssX_A [D, ] + t(shift_X) %*% A;
+#
+# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+# tssX_A = diag (scale_X) %*% A + shift_X %*% A [D, ];
+
+# Convert "Y_vec" into indicator matrice:
+if (min (Y_vec) <= 0) { 
+    # Category labels "0", "-1" etc. are converted into the largest label
+    max_y = max (Y_vec);
+    Y_vec  = Y_vec  + (- Y_vec  + max_y + 1) * ppred (Y_vec , 0.0, "<=");
+}
+Y = table (seq (1, N, 1), Y_vec);
+K = ncol (Y) - 1;   # The number of  non-baseline categories
+
+lambda = (scale_lambda %*% matrix (1, rows = 1, cols = K)) * regularization;
+delta = 0.5 * sqrt (D) / max (sqrt (rowSums_X_sq));
+
+B = matrix (0, rows = D, cols = K);     ### LT = X %*% (SHIFT/SCALE TRANSFORM) %*% B;
+                                        ### LT = append (LT, matrix (0, rows = N, cols = 1));
+                                        ### LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1);
+P = matrix (1, rows = N, cols = K+1);   ### exp_LT = exp (LT);
+P = P / (K + 1);                        ### P =  exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1));
+obj = N * log (K + 1);                  ### obj = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2));
+
+Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
+if (intercept_status == 2) {
+    Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
+}
+Grad = Grad + lambda * B;
+norm_Grad = sqrt (sum (Grad ^ 2));
+norm_Grad_initial = norm_Grad;
+
+if (maxinneriter == 0) {
+    maxinneriter = D * K;
+}
+iter = 1;
+
+# boolean for convergence check
+converge = (norm_Grad < tol) | (iter > maxiter);
+
+print ("-- Initially:  Objective = " + obj + ",  Gradient Norm = " + norm_Grad + ",  Trust Delta = " + delta);
+
+if (fileLog != " ") {
+    log_str = "OBJECTIVE,0," + obj;
+    log_str = append (log_str, "GRADIENT_NORM,0," + norm_Grad);
+    log_str = append (log_str, "TRUST_DELTA,0," + delta);
+} else {
+    log_str = " ";
+}
+
+while (! converge)
+{
+	# SOLVE TRUST REGION SUB-PROBLEM
+	S = matrix (0, rows = D, cols = K);
+	R = - Grad;
+	V = R;
+	delta2 = delta ^ 2;
+	inneriter = 1;
+	norm_R2 = sum (R ^ 2);
+	innerconverge = (sqrt (norm_R2) <= psi * norm_Grad);
+	is_trust_boundary_reached = 0;
+
+	while (! innerconverge)
+	{
+	    if (intercept_status == 2) {
+	        ssX_V = diag (scale_X) %*% V;
+	        ssX_V [D, ] = ssX_V [D, ] + t(shift_X) %*% V;
+	    } else {
+	        ssX_V = V;
+	    }
+        Q = P [, 1:K] * (X %*% ssX_V);
+        HV = t(X) %*% (Q - P [, 1:K] * (rowSums (Q) %*% matrix (1, rows = 1, cols = K)));
+        if (intercept_status == 2) {
+            HV = diag (scale_X) %*% HV + shift_X %*% HV [D, ];
+        }
+        HV = HV + lambda * V;
+		alpha = norm_R2 / sum (V * HV);
+		Snew = S + alpha * V;
+		norm_Snew2 = sum (Snew ^ 2);
+		if (norm_Snew2 <= delta2)
+		{
+			S = Snew;
+			R = R - alpha * HV;
+			old_norm_R2 = norm_R2 
+			norm_R2 = sum (R ^ 2);
+			V = R + (norm_R2 / old_norm_R2) * V;
+			innerconverge = (sqrt (norm_R2) <= psi * norm_Grad);
+		} else {
+	        is_trust_boundary_reached = 1;
+			sv = sum (S * V);
+			v2 = sum (V ^ 2);
+			s2 = sum (S ^ 2);
+			rad = sqrt (sv ^ 2 + v2 * (delta2 - s2));
+			if (sv >= 0) {
+				alpha = (delta2 - s2) / (sv + rad);
+			} else {
+				alpha = (rad - sv) / v2;
+			}
+			S = S + alpha * V;
+			R = R - alpha * HV;
+			innerconverge = TRUE;
+		}
+	    inneriter = inneriter + 1;
+	    innerconverge = innerconverge | (inneriter > maxinneriter);
+	}  
+	
+	# END TRUST REGION SUB-PROBLEM
+	
+	# compute rho, update B, obtain delta
+	gs = sum (S * Grad);
+	qk = - 0.5 * (gs - sum (S * R));
+	B_new = B + S;
+	if (intercept_status == 2) {
+	    ssX_B_new = diag (scale_X) %*% B_new;
+	    ssX_B_new [D, ] = ssX_B_new [D, ] + t(shift_X) %*% B_new;
+    } else {
+        ssX_B_new = B_new;
+    }
+    
+    LT = append ((X %*% ssX_B_new), matrix (0, rows = N, cols = 1));
+    if (fileLog != " ") {
+        log_str = append (log_str, "LINEAR_TERM_MIN,"  + iter + "," + min (LT));
+        log_str = append (log_str, "LINEAR_TERM_MAX,"  + iter + "," + max (LT));
+    }
+    LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1);
+    exp_LT = exp (LT);
+    P_new  = exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1));
+    obj_new = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2));
+    	
+	# Consider updating LT in the inner loop
+	# Consider the big "obj" and "obj_new" rounding-off their small difference below:
+
+	actred = (obj - obj_new);
+	
+	rho = actred / qk;
+	is_rho_accepted = (rho > eta0);
+	snorm = sqrt (sum (S ^ 2));
+
+    if (fileLog != " ") {
+        log_str = append (log_str, "NUM_CG_ITERS,"     + iter + "," + (inneriter - 1));
+        log_str = append (log_str, "IS_TRUST_REACHED," + iter + "," + is_trust_boundary_reached);
+        log_str = append (log_str, "POINT_STEP_NORM,"  + iter + "," + snorm);
+        log_str = append (log_str, "OBJECTIVE,"        + iter + "," + obj_new);
+        log_str = append (log_str, "OBJ_DROP_REAL,"    + iter + "," + actred);
+        log_str = append (log_str, "OBJ_DROP_PRED,"    + iter + "," + qk);
+        log_str = append (log_str, "OBJ_DROP_RATIO,"   + iter + "," + rho);
+    }
+
+	if (iter == 1) {
+	   delta = min (delta, snorm);
+	}
+
+	alpha2 = obj_new - obj - gs;
+	if (alpha2 <= 0) {
+	   alpha = sigma3;
+	} 
+	else {
+	   alpha = max (sigma1, -0.5 * gs / alpha2);
+	}
+	
+	if (rho < eta0) {
+		delta = min (max (alpha, sigma1) * snorm, sigma2 * delta);
+	}
+	else {
+		if (rho < eta1) {
+			delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta));
+		}
+		else { 
+			if (rho < eta2) {
+				delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta));
+			}
+			else {
+				delta = max (delta, min (alpha * snorm, sigma3 * delta));
+			}
+		}
+	} 
+	
+	if (is_trust_boundary_reached == 1)
+	{
+	    print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations, trust bound REACHED");
+	} else {
+	    print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations");
+	}
+	print ("   -- Obj.Reduction:  Actual = " + actred + ",  Predicted = " + qk + 
+	       "  (A/P: " + (round (10000.0 * rho) / 10000.0) + "),  Trust Delta = " + delta);
+	       
+	if (is_rho_accepted)
+	{
+		B = B_new;
+		P = P_new;
+		Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]);
+		if (intercept_status == 2) {
+		    Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ];
+		}
+		Grad = Grad + lambda * B;
+		norm_Grad = sqrt (sum (Grad ^ 2));
+		obj = obj_new;
+	    print ("   -- New Objective = " + obj + ",  Beta Change Norm = " + snorm + ",  Gradient Norm = " + norm_Grad);
+        if (fileLog != " ") {
+            log_str = append (log_str, "IS_POINT_UPDATED," + iter + ",1");
+            log_str = append (log_str, "GRADIENT_NORM,"    + iter + "," + norm_Grad);
+        }
+	} else {
+        if (fileLog != " ") {
+            log_str = append (log_str, "IS_POINT_UPDATED," + iter + ",0");
+        }
+    }	
+	
+    if (fileLog != " ") {
+        log_str = append (log_str, "TRUST_DELTA," + iter + "," + delta);
+    }	
+	
+	iter = iter + 1;
+	converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxiter) |
+	    ((is_trust_boundary_reached == 0) & (abs (actred) < (abs (obj) + abs (obj_new)) * 0.00000000000001)));
+    if (converge) { print ("Termination / Convergence condition satisfied."); } else { print (" "); }
+} 
+
+if (intercept_status == 2) {
+    B_out = diag (scale_X) %*% B;
+    B_out [D, ] = B_out [D, ] + t(shift_X) %*% B;
+} else {
+    B_out = B;
+}
+write (B_out, fileB, format=fmtB);
+
+if (fileLog != " ") {
+    write (log_str, fileLog);
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/PCA.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/PCA.dml b/scripts/algorithms/PCA.dml
index e8b4aec..65800b7 100644
--- a/scripts/algorithms/PCA.dml
+++ b/scripts/algorithms/PCA.dml
@@ -1,112 +1,112 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 
-# This script performs Principal Component Analysis (PCA) on the given input data.
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME   TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# INPUT  String ---      Location to read the matrix A of feature vectors
-# K      Int    ---      Indicates dimension of the new vector space constructed from eigen vectors
-# CENTER Int    0        Indicates whether or not to center data 
-# SCALE  Int    0        Indicates whether or not to scale data 
-# OFMT   String ---      Output data format
-# PROJDATA Int  0	     This argument indicates if the data should be projected or not
-# MODEL  String ---      Location to already existing model: eigenvectors and eigenvalues 
-# OUTPUT String /        Location to write output matrices (covariance matrix, new basis vectors, 
-#                           and data projected onto new basis vectors)
-# hadoop jar SystemML.jar -f PCA.dml -nvargs INPUT=INPUT_DIR/pca-1000x1000 
-# OUTPUT=OUTPUT_DIR/pca-1000x1000-model PROJDATA=1 CENTER=1 SCALE=1
-# ---------------------------------------------------------------------------------------------
-
-A = read($INPUT);
-K = ifdef($K, ncol(A));
-ofmt = ifdef($OFMT, "CSV");
-projectData = ifdef($PROJDATA,0);
-model = ifdef($MODEL,"");
-center = ifdef($CENTER,0);
-scale = ifdef($SCALE,0);
-output = ifdef($OUTPUT,"/");
-
-evec_dominant = matrix(0,cols=1,rows=1);
-
-if (model != "") {
-	# reuse existing model to project data
-    evec_dominant = read(model+"/dominant.eigen.vectors");
-}else{
-	if (model == "" ){
-		model = output;	
-	}	
-
-	N = nrow(A);
-	D = ncol(A);
-
-	# perform z-scoring (centering and scaling)
-	if (center == 1) {
-	    cm = colMeans(A);
-	    A = A - cm;
-	}
-	if (scale == 1) {
-	    cvars = (colSums (A^2));	
-	    if (center == 1){
-		cm = colMeans(A);
-	    	cvars = (cvars - N*(cm^2))/(N-1);		    
- 	    }
-	    Azscored = (A)/sqrt(cvars);
-            A = Azscored;
-	}	
-
-	# co-variance matrix 
-	mu = colSums(A)/N;
-	C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu;
-
-
-	# compute eigen vectors and values
-	[evalues, evectors] = eigen(C);
-
-	decreasing_Idx = order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE);
-	diagmat = table(seq(1,D),decreasing_Idx);
-	# sorts eigenvalues by decreasing order
-	evalues = diagmat %*% evalues;
-	# sorts eigenvectors column-wise in the order of decreasing eigenvalues
-	evectors = evectors %*% diagmat;
-
-
-	# select K dominant eigen vectors 
-	nvec = ncol(evectors);
-
-	eval_dominant = evalues[1:K, 1];
-	evec_dominant = evectors[,1:K];
-	
-	# the square root of eigenvalues
-	eval_stdev_dominant = sqrt(eval_dominant);
-	
-	write(eval_stdev_dominant, model+"/dominant.eigen.standard.deviations", format=ofmt);
-	write(eval_dominant, model+"/dominant.eigen.values", format=ofmt);
-	write(evec_dominant, model+"/dominant.eigen.vectors", format=ofmt);
-}
-if (projectData == 1 | model != ""){
-	# Construct new data set by treating computed dominant eigenvectors as the basis vectors
-	newA = A %*% evec_dominant;
-	write(newA, output+"/projected.data", format=ofmt);
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# This script performs Principal Component Analysis (PCA) on the given input data.
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME   TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# INPUT  String ---      Location to read the matrix A of feature vectors
+# K      Int    ---      Indicates dimension of the new vector space constructed from eigen vectors
+# CENTER Int    0        Indicates whether or not to center data 
+# SCALE  Int    0        Indicates whether or not to scale data 
+# OFMT   String ---      Output data format
+# PROJDATA Int  0	     This argument indicates if the data should be projected or not
+# MODEL  String ---      Location to already existing model: eigenvectors and eigenvalues 
+# OUTPUT String /        Location to write output matrices (covariance matrix, new basis vectors, 
+#                           and data projected onto new basis vectors)
+# hadoop jar SystemML.jar -f PCA.dml -nvargs INPUT=INPUT_DIR/pca-1000x1000 
+# OUTPUT=OUTPUT_DIR/pca-1000x1000-model PROJDATA=1 CENTER=1 SCALE=1
+# ---------------------------------------------------------------------------------------------
+
+A = read($INPUT);
+K = ifdef($K, ncol(A));
+ofmt = ifdef($OFMT, "CSV");
+projectData = ifdef($PROJDATA,0);
+model = ifdef($MODEL,"");
+center = ifdef($CENTER,0);
+scale = ifdef($SCALE,0);
+output = ifdef($OUTPUT,"/");
+
+evec_dominant = matrix(0,cols=1,rows=1);
+
+if (model != "") {
+	# reuse existing model to project data
+    evec_dominant = read(model+"/dominant.eigen.vectors");
+}else{
+	if (model == "" ){
+		model = output;	
+	}	
+
+	N = nrow(A);
+	D = ncol(A);
+
+	# perform z-scoring (centering and scaling)
+	if (center == 1) {
+	    cm = colMeans(A);
+	    A = A - cm;
+	}
+	if (scale == 1) {
+	    cvars = (colSums (A^2));	
+	    if (center == 1){
+		cm = colMeans(A);
+	    	cvars = (cvars - N*(cm^2))/(N-1);		    
+ 	    }
+	    Azscored = (A)/sqrt(cvars);
+            A = Azscored;
+	}	
+
+	# co-variance matrix 
+	mu = colSums(A)/N;
+	C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu;
+
+
+	# compute eigen vectors and values
+	[evalues, evectors] = eigen(C);
+
+	decreasing_Idx = order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE);
+	diagmat = table(seq(1,D),decreasing_Idx);
+	# sorts eigenvalues by decreasing order
+	evalues = diagmat %*% evalues;
+	# sorts eigenvectors column-wise in the order of decreasing eigenvalues
+	evectors = evectors %*% diagmat;
+
+
+	# select K dominant eigen vectors 
+	nvec = ncol(evectors);
+
+	eval_dominant = evalues[1:K, 1];
+	evec_dominant = evectors[,1:K];
+	
+	# the square root of eigenvalues
+	eval_stdev_dominant = sqrt(eval_dominant);
+	
+	write(eval_stdev_dominant, model+"/dominant.eigen.standard.deviations", format=ofmt);
+	write(eval_dominant, model+"/dominant.eigen.values", format=ofmt);
+	write(evec_dominant, model+"/dominant.eigen.vectors", format=ofmt);
+}
+if (projectData == 1 | model != ""){
+	# Construct new data set by treating computed dominant eigenvectors as the basis vectors
+	newA = A %*% evec_dominant;
+	write(newA, output+"/projected.data", format=ofmt);
+}



[14/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/BinUaggChain_Col.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/BinUaggChain_Col.R b/src/test/scripts/functions/binary/matrix/BinUaggChain_Col.R
index 674ee3a..31e2d26 100644
--- a/src/test/scripts/functions/binary/matrix/BinUaggChain_Col.R
+++ b/src/test/scripts/functions/binary/matrix/BinUaggChain_Col.R
@@ -1,32 +1,32 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- A / rowSums(A);
-
-writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- A / rowSums(A);
+
+writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/CentralMoment.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/CentralMoment.R b/src/test/scripts/functions/binary/matrix/CentralMoment.R
index fa33986..34ee2dd 100644
--- a/src/test/scripts/functions/binary/matrix/CentralMoment.R
+++ b/src/test/scripts/functions/binary/matrix/CentralMoment.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-order = as.integer(args[2]);
-
-s = moment(A, order, central=TRUE);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+order = as.integer(args[2]);
+
+s = moment(A, order, central=TRUE);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/Covariance.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/Covariance.R b/src/test/scripts/functions/binary/matrix/Covariance.R
index 199fcf1..2aab811 100644
--- a/src/test/scripts/functions/binary/matrix/Covariance.R
+++ b/src/test/scripts/functions/binary/matrix/Covariance.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-s = cov(A, B);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+s = cov(A, B);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplication.R b/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplication.R
index 315169e..ebf7909 100644
--- a/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplication.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-if( ncol(B)==1 ){
-   C = as.matrix(diag(as.vector(A %*% B)));
-} else{
-   C = as.matrix(diag(A %*% B));
-}
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+if( ncol(B)==1 ){
+   C = as.matrix(diag(as.vector(A %*% B)));
+} else{
+   C = as.matrix(diag(A %*% B));
+}
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplicationTranspose.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplicationTranspose.R b/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplicationTranspose.R
index fc0d59d..cef1f35 100644
--- a/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplicationTranspose.R
+++ b/src/test/scripts/functions/binary/matrix/DiagMatrixMultiplicationTranspose.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = diag(A %*% t(B));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = diag(A %*% t(B));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/ElementwiseModulusTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/ElementwiseModulusTest.dml b/src/test/scripts/functions/binary/matrix/ElementwiseModulusTest.dml
index 1029ced..27ca7c0 100644
--- a/src/test/scripts/functions/binary/matrix/ElementwiseModulusTest.dml
+++ b/src/test/scripts/functions/binary/matrix/ElementwiseModulusTest.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ElementwiseModulusTest.java
-
-A = read("$$indir$$a", rows=$$rows$$, cols=$$cols$$, format="text");
-B = read("$$indir$$b", rows=$$rows$$, cols=$$cols$$, format="text");
-C = A %% B;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ElementwiseModulusTest.java
+
+A = read("$$indir$$a", rows=$$rows$$, cols=$$cols$$, format="text");
+B = read("$$indir$$b", rows=$$rows$$, cols=$$cols$$, format="text");
+C = A %% B;
 write(C, "$$outdir$$c", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/ElementwiseModulusVariableDimensionsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/ElementwiseModulusVariableDimensionsTest.dml b/src/test/scripts/functions/binary/matrix/ElementwiseModulusVariableDimensionsTest.dml
index 487242f..30f09be 100644
--- a/src/test/scripts/functions/binary/matrix/ElementwiseModulusVariableDimensionsTest.dml
+++ b/src/test/scripts/functions/binary/matrix/ElementwiseModulusVariableDimensionsTest.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ElementwiseModulusTest.java
-
-A = read("$$indir$$a", rows=$$rows1$$, cols=$$cols1$$, format="text");
-B = read("$$indir$$b", rows=$$rows2$$, cols=$$cols2$$, format="text");
-C = A %% B;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ElementwiseModulusTest.java
+
+A = read("$$indir$$a", rows=$$rows1$$, cols=$$cols1$$, format="text");
+B = read("$$indir$$b", rows=$$rows2$$, cols=$$cols2$$, format="text");
+C = A %% B;
 write(C, "$$outdir$$c", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/IQM.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/IQM.R b/src/test/scripts/functions/binary/matrix/IQM.R
index 06aacc4..ed15d6a 100644
--- a/src/test/scripts/functions/binary/matrix/IQM.R
+++ b/src/test/scripts/functions/binary/matrix/IQM.R
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-m = nrow(A);
-S = sort(A)
-q25d=m*0.25
-q75d=m*0.75
-q25i=ceiling(q25d)
-q75i=ceiling(q75d)
-iqm = sum(S[(q25i+1):q75i])
-iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-iqm = iqm/(m*0.5)
-
-miqm = as.matrix(iqm);
-
-writeMM(as(miqm, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+m = nrow(A);
+S = sort(A)
+q25d=m*0.25
+q75d=m*0.75
+q25i=ceiling(q25d)
+q75i=ceiling(q75d)
+iqm = sum(S[(q25i+1):q75i])
+iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+iqm = iqm/(m*0.5)
+
+miqm = as.matrix(iqm);
+
+writeMM(as(miqm, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/MapMultChain.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/MapMultChain.R b/src/test/scripts/functions/binary/matrix/MapMultChain.R
index b5124a1..65354b6 100644
--- a/src/test/scripts/functions/binary/matrix/MapMultChain.R
+++ b/src/test/scripts/functions/binary/matrix/MapMultChain.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-v <- readMM(paste(args[1], "v.mtx", sep=""))
-
-R = (t(X) %*% (X %*% v));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+v <- readMM(paste(args[1], "v.mtx", sep=""))
+
+R = (t(X) %*% (X %*% v));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/MapMultChainWeights.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/MapMultChainWeights.R b/src/test/scripts/functions/binary/matrix/MapMultChainWeights.R
index e9c8d3e..63e8d08 100644
--- a/src/test/scripts/functions/binary/matrix/MapMultChainWeights.R
+++ b/src/test/scripts/functions/binary/matrix/MapMultChainWeights.R
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-v <- readMM(paste(args[1], "v.mtx", sep=""))
-w <- readMM(paste(args[1], "w.mtx", sep=""))
-
-R = (t(X) %*% (w*(X %*% v)));
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+v <- readMM(paste(args[1], "v.mtx", sep=""))
+w <- readMM(paste(args[1], "w.mtx", sep=""))
+
+R = (t(X) %*% (w*(X %*% v)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/MapMultChainWeights2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/MapMultChainWeights2.R b/src/test/scripts/functions/binary/matrix/MapMultChainWeights2.R
index 2c7320d..d0daf06 100644
--- a/src/test/scripts/functions/binary/matrix/MapMultChainWeights2.R
+++ b/src/test/scripts/functions/binary/matrix/MapMultChainWeights2.R
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X <- readMM(paste(args[1], "X.mtx", sep=""))
-v <- readMM(paste(args[1], "v.mtx", sep=""))
-w <- readMM(paste(args[1], "w.mtx", sep=""))
-
-R = t(X) %*% ((X %*% v)-w);
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X <- readMM(paste(args[1], "X.mtx", sep=""))
+v <- readMM(paste(args[1], "v.mtx", sep=""))
+w <- readMM(paste(args[1], "w.mtx", sep=""))
+
+R = t(X) %*% ((X %*% v)-w);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/MapMultLimitTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/MapMultLimitTest.dml b/src/test/scripts/functions/binary/matrix/MapMultLimitTest.dml
index e9827eb..e9c0628 100644
--- a/src/test/scripts/functions/binary/matrix/MapMultLimitTest.dml
+++ b/src/test/scripts/functions/binary/matrix/MapMultLimitTest.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = read($1);
-B1 = read($2);
-B2 = read($3);
-
-C1 = A %*% B1;
-C2 = A %*% B2;
-
-write(C1, $4);
-write(C2, $5);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1);
+B1 = read($2);
+B2 = read($3);
+
+C1 = A %*% B1;
+C2 = A %*% B2;
+
+write(C1, $4);
+write(C2, $5);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/MatrixVectorMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/MatrixVectorMultiplication.R b/src/test/scripts/functions/binary/matrix/MatrixVectorMultiplication.R
index 991e9cc..9f5d6aa 100644
--- a/src/test/scripts/functions/binary/matrix/MatrixVectorMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix/MatrixVectorMultiplication.R
@@ -1,36 +1,36 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A1 <- readMM(paste(args[1], "A.mtx", sep=""))
-A <- as.matrix(A1);
-x1 <- readMM(paste(args[1], "x.mtx", sep=""))
-x <- as.matrix(x1);
-
-y <- A%*%x;
-
-writeMM(as(y, "CsparseMatrix"), paste(args[2], "y", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A1 <- readMM(paste(args[1], "A.mtx", sep=""))
+A <- as.matrix(A1);
+x1 <- readMM(paste(args[1], "x.mtx", sep=""))
+x <- as.matrix(x1);
+
+y <- A%*%x;
+
+writeMM(as(y, "CsparseMatrix"), paste(args[2], "y", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/Median.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/Median.R b/src/test/scripts/functions/binary/matrix/Median.R
index 41c1c16..0cf2c31 100644
--- a/src/test/scripts/functions/binary/matrix/Median.R
+++ b/src/test/scripts/functions/binary/matrix/Median.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-
-s = median(A);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+
+s = median(A);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/OuterProduct.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/OuterProduct.R b/src/test/scripts/functions/binary/matrix/OuterProduct.R
index b1cdbb8..72d270d 100644
--- a/src/test/scripts/functions/binary/matrix/OuterProduct.R
+++ b/src/test/scripts/functions/binary/matrix/OuterProduct.R
@@ -1,41 +1,41 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-#note: we use matrix here, becase Matrix created out-of-memory issues
-# 'Cholmod error 'out of memory' at file ../Core/cholmod_memory.c, line 147'
-# however, R still fails with 'Error: cannot allocate vector of size 3.0 Gb'
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- A %*% B;
-#C <- A %o% B; 
-
-cmin <- min(C);
-
-writeMM(as(cmin, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+#note: we use matrix here, becase Matrix created out-of-memory issues
+# 'Cholmod error 'out of memory' at file ../Core/cholmod_memory.c, line 147'
+# however, R still fails with 'Error: cannot allocate vector of size 3.0 Gb'
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- A %*% B;
+#C <- A %o% B; 
+
+cmin <- min(C);
+
+writeMM(as(cmin, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/Quantile.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/Quantile.R b/src/test/scripts/functions/binary/matrix/Quantile.R
index 39c1446..1997456 100644
--- a/src/test/scripts/functions/binary/matrix/Quantile.R
+++ b/src/test/scripts/functions/binary/matrix/Quantile.R
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-p = as.double(args[2]);
-
-s = quantile(A, p);
-m = as.matrix(s);
-
-writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+p = as.double(args[2]);
+
+s = quantile(A, p);
+m = as.matrix(s);
+
+writeMM(as(m, "CsparseMatrix"), paste(args[3], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/ScalarModulusTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/ScalarModulusTest.dml b/src/test/scripts/functions/binary/matrix/ScalarModulusTest.dml
index ceaa003..12468ce 100644
--- a/src/test/scripts/functions/binary/matrix/ScalarModulusTest.dml
+++ b/src/test/scripts/functions/binary/matrix/ScalarModulusTest.dml
@@ -1,40 +1,40 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ScalarModulusTest.java
-
-$$vardeclaration$$
-
-Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
-VectorLeft = Vector %% $$divisor$$;
-write(VectorLeft, "$$outdir$$vector_left", format="text");
-
-Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
-VectorRight = $$dividend$$ %% Vector;
-write(VectorRight, "$$outdir$$vector_right", format="text");
-
-Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
-MatrixLeft = Matrix %% $$divisor$$;
-write(MatrixLeft, "$$outdir$$matrix_left", format="text");
-
-Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
-MatrixRight = $$dividend$$ %% Matrix;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# junit test class: org.apache.sysml.test.integration.functions.binary.matrix.ScalarModulusTest.java
+
+$$vardeclaration$$
+
+Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
+VectorLeft = Vector %% $$divisor$$;
+write(VectorLeft, "$$outdir$$vector_left", format="text");
+
+Vector = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text");
+VectorRight = $$dividend$$ %% Vector;
+write(VectorRight, "$$outdir$$vector_right", format="text");
+
+Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
+MatrixLeft = Matrix %% $$divisor$$;
+write(MatrixLeft, "$$outdir$$matrix_left", format="text");
+
+Matrix = read("$$indir$$matrix", rows=$$rows$$, cols=$$cols$$, format="text");
+MatrixRight = $$dividend$$ %% Matrix;
 write(MatrixRight, "$$outdir$$matrix_right", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplication.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplication.R b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplication.R
index 8b73634..bfc621a 100644
--- a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplication.R
+++ b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplication.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = t(A) %*% B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = t(A) %*% B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.R b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.R
index 7fbcb68..c94535c 100644
--- a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.R
+++ b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = -t(A) %*% B;
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = -t(A) %*% B;
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.dml b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.dml
index c9f9c22..a31e1bf 100644
--- a/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.dml
+++ b/src/test/scripts/functions/binary/matrix/TransposeMatrixMultiplicationMinus.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-A = read($1, rows=$2, cols=$3, format="text");
-B = read($4, rows=$5, cols=$6, format="text");
-
-C = -t(A) %*% B;
-
-write(C, $7, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1, rows=$2, cols=$3, format="text");
+B = read($4, rows=$5, cols=$6, format="text");
+
+C = -t(A) %*% B;
+
+write(C, $7, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChain.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChain.R b/src/test/scripts/functions/binary/matrix/UaggOuterChain.R
index 846e11c..c7e9f3a 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChain.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChain.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,"<"));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,"<"));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainColSums.R
index f8b9cf1..fbfbd10 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainColSums.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,"<"))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,"<"))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainEquals.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainEquals.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainEquals.R
index dda255c..b14e7b0 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainEquals.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainEquals.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,"=="));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,"=="));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsColSums.R
index bc07ef8..b948c1d 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsColSums.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,"=="))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,"=="))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsSums.R
index 6c5237e..57d2d86 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainEqualsSums.R
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = sum(outer(A,B,"=="));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = sum(outer(A,B,"=="));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainGreater.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreater.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreater.R
index d5f803d..ff310ab 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreater.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreater.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,">"));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,">"));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterColSums.R
index 847febe..6238bb9 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterColSums.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,">"))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,">"))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEquals.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEquals.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEquals.R
index 713f3d3..3d73686 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEquals.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEquals.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,">="));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,">="));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEqualsColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEqualsColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEqualsColSums.R
index 7a02c6f..0ab7ee2 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEqualsColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainGreaterEqualsColSums.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,">="))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,">="))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEquals.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEquals.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEquals.R
index 4292d08..5aa68d8 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEquals.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEquals.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,"<="));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,"<="));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEqualsColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEqualsColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEqualsColSums.R
index bc743db..e8e65aa 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEqualsColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainLessEqualsColSums.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,"<="))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,"<="))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEquals.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEquals.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEquals.R
index 4875faa..7edb245 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEquals.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEquals.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = rowSums(outer(A,B,"!="));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = rowSums(outer(A,B,"!="));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEqualsColSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEqualsColSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEqualsColSums.R
index 2aea9d5..13ca5ad 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEqualsColSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainNotEqualsColSums.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C <- t(as.matrix(colSums(outer(A,B,"!="))));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C <- t(as.matrix(colSums(outer(A,B,"!="))));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/binary/matrix/UaggOuterChainSums.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/binary/matrix/UaggOuterChainSums.R b/src/test/scripts/functions/binary/matrix/UaggOuterChainSums.R
index 2065bef..1d8a95a 100644
--- a/src/test/scripts/functions/binary/matrix/UaggOuterChainSums.R
+++ b/src/test/scripts/functions/binary/matrix/UaggOuterChainSums.R
@@ -1,33 +1,33 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-
-C = sum(outer(A,B,"<"));
-
-writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));  
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+
+C = sum(outer(A,B,"<"));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));  
+



[30/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/stratstats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/stratstats.dml b/src/test/scripts/applications/ctableStats/stratstats.dml
index 3ac684a..7eb1858 100644
--- a/src/test/scripts/applications/ctableStats/stratstats.dml
+++ b/src/test/scripts/applications/ctableStats/stratstats.dml
@@ -1,350 +1,350 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# STRATIFIED BIVARIATE STATISTICS, VERSION 2
-# 
-# INPUT  1: Dataset with records as rows (matrix filename)
-# INPUT  2: The stratum ID column number (integer)
-#   Stratum ID must be a small positive integer; fractional values are rounded; if 0 or less, shifted to positive.
-# INPUT  3: 1st variate column numbers (matrix filename)
-# INPUT  4: 2nd variate column numbers (matrix filename)
-# INPUT  5: Output (matrix filename)
-#
-# OUTPUT 1: Output Matrix with 40 columns, containing the following information:
-#     Rows: One row per each distinct pair (1st variate, 2nd variate)
-#     Col 01: 1st variate column number
-#     Col 02: 1st variate global presence count
-#     Col 03: 1st variate global mean
-#     Col 04: 1st variate global standard deviation
-#     Col 05: 1st variate stratified standard deviation
-#     Col 06: R-squared, 1st variate vs. strata
-#     Col 07: P-value, 1st variate vs. strata
-#     Col 08-10: Reserved
-#     Col 11: 2nd variate column number
-#     Col 12: 2nd variate global presence count
-#     Col 13: 2nd variate global mean
-#     Col 14: 2nd variate global standard deviation
-#     Col 15: 2nd variate stratified standard deviation
-#     Col 16: R-squared, 2nd variate vs. strata
-#     Col 17: P-value, 2nd variate vs. strata
-#     Col 18-20: Reserved
-#     Col 21: Global 1st & 2nd variate presence count
-#     Col 22: Global regression slope (2nd variate vs. 1st variate)
-#     Col 23: Global regression slope standard deviation
-#     Col 24: Global correlation = +/- sqrt(R-squared)
-#     Col 25: Global residual standard deviation
-#     Col 26: Global R-squared
-#     Col 27: Global P-value for hypothesis "slope = 0"
-#     Col 28-30: Reserved
-#     Col 31: Stratified 1st & 2nd variate presence count
-#     Col 32: Stratified regression slope (2nd variate vs. 1st variate)
-#     Col 33: Stratified regression slope standard deviation
-#     Col 34: Stratified correlation = +/- sqrt(R-squared)
-#     Col 35: Stratified residual standard deviation
-#     Col 36: Stratified R-squared
-#     Col 37: Stratified P-value for hypothesis "slope = 0"
-#     Col 38: Number of strata with at least two counted points
-#     Col 39-40: Reserved
-#     TO DO: GOODNESS OF FIT MEASURE
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/stratstats.dml -exec singlenode -args PATH/stratstats_test_data.mtx 1 PATH/stratstats_test_X.mtx PATH/stratstats_test_Y.mtx PATH/stratstats_test_output.mtx
-
-NaN = 0/0;
-
-print ("BEGIN STRATIFIED STATISTICS SCRIPT");
-
-print ("Reading the input matrices...");
-
-DataWithNaNs = read ($1, format = "text");
-Xcols = read ($3, format = "text");
-Ycols = read ($4, format = "text");
-stratum_column_id = $2;
-num_records  = nrow(DataWithNaNs);
-num_attrs    = ncol(DataWithNaNs);
-num_attrs_X  = ncol(Xcols);
-num_attrs_Y  = ncol(Ycols);
-num_attrs_XY = num_attrs_X * num_attrs_Y;
-
-
-print ("Preparing the variates...");
-
-Data = deNaN (DataWithNaNs);
-DataNaNmask = ppred (DataWithNaNs, NaN, "==");
-
-tXcols = t(Xcols);
-ones = matrix (1.0, rows = num_attrs_X, cols = 1);
-one_to_num_attrs_X = sumup (ones);
-ProjX = matrix (0.0, rows = num_attrs, cols = num_attrs_X);
-ProjX_ctable = table (tXcols, one_to_num_attrs_X);
-ProjX [1:nrow(ProjX_ctable), ] = ProjX_ctable;
-X = Data %*% ProjX;
-X_mask = 1 - (DataNaNmask %*% ProjX);
-
-tYcols = t(Ycols);
-ones = matrix (1.0, rows = num_attrs_Y, cols = 1);
-one_to_num_attrs_Y = sumup (ones);
-ProjY = matrix (0.0, rows = num_attrs, cols = num_attrs_Y);
-ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
-ProjY [1:nrow(ProjY_ctable), ] = ProjY_ctable;
-Y = Data %*% ProjY;
-Y_mask = 1 - (DataNaNmask %*% ProjY);
-
-
-print ("Preparing the strata...");
-
-Proj_to_deNaN_strata = diag (1 - DataNaNmask [, stratum_column_id]);
-Proj_to_deNaN_strata = removeEmpty (target = Proj_to_deNaN_strata, margin = "rows");
-vector_of_strata_with_empty_but_no_NaNs = round (Proj_to_deNaN_strata %*% (Data [, stratum_column_id]));
-vector_of_strata_with_empty_but_no_NaNs = vector_of_strata_with_empty_but_no_NaNs + (1 - min (vector_of_strata_with_empty_but_no_NaNs));
-num_strata_with_empty_but_no_NaNs = max (vector_of_strata_with_empty_but_no_NaNs);
-num_records_with_nonNaN_strata = nrow (Proj_to_deNaN_strata);
-ones = matrix (1.0, rows = num_records_with_nonNaN_strata, cols = 1);
-one_to_num_records_with_nonNaN_strata = sumup (ones);
-StrataSummator_with_empty_from_nonNaNs = table (vector_of_strata_with_empty_but_no_NaNs, one_to_num_records_with_nonNaN_strata);
-StrataSummator_from_nonNaNs = removeEmpty (target = StrataSummator_with_empty_from_nonNaNs, margin = "rows");
-StrataSummator = StrataSummator_from_nonNaNs %*% Proj_to_deNaN_strata;
-num_strata = nrow (StrataSummator);
-num_empty_strata = num_strata_with_empty_but_no_NaNs - num_strata;
-print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but non-NaN strata.");
-
-print ("Computing the global single-variate statistics...");
-
-cnt_X_global = colSums (X_mask);
-cnt_Y_global = colSums (Y_mask);
-avg_X_global = colSums (X) / cnt_X_global;
-avg_Y_global = colSums (Y) / cnt_Y_global;
-var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global);
-var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global);
-                 sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
-stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
-                 sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1)
-stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
-
-print ("Computing the stratified single-variate statistics...");
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
-
-Cnt_X_per_stratum = StrataSummator %*% X_mask;
-Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
-Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
-Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
-One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum);
-One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum);
-num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
-num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
-
-Sum_X_per_stratum  = StrataSummator %*% X;
-Sum_Y_per_stratum  = StrataSummator %*% Y;
-
-# Recompute some global statistics to exclude NaN-stratum records
-
-cnt_X_excluding_NaNstrata = colSums (Cnt_X_per_stratum);
-cnt_Y_excluding_NaNstrata = colSums (Cnt_Y_per_stratum);
-sum_X_excluding_NaNstrata = colSums (Sum_X_per_stratum);
-sum_Y_excluding_NaNstrata = colSums (Sum_Y_per_stratum);
-var_sumX_excluding_NaNstrata = colSums (StrataSummator %*% (X * X)) - (sum_X_excluding_NaNstrata * sum_X_excluding_NaNstrata) / cnt_X_excluding_NaNstrata;
-var_sumY_excluding_NaNstrata = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_excluding_NaNstrata * sum_Y_excluding_NaNstrata) / cnt_Y_excluding_NaNstrata;
-
-# Compute the stratified statistics
-
-var_sumX_stratified = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
-var_sumY_stratified = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
-                      sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-stdev_X_stratified  = sqrt_failsafe (sqrt_failsafe_input_3);
-                      sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-stdev_Y_stratified  = sqrt_failsafe (sqrt_failsafe_input_4);
-r_sqr_X_vs_strata   = 1 - var_sumX_stratified / var_sumX_excluding_NaNstrata;
-r_sqr_Y_vs_strata   = 1 - var_sumY_stratified / var_sumY_excluding_NaNstrata;
-fStat_X_vs_strata   = ((var_sumX_excluding_NaNstrata - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata));
-fStat_Y_vs_strata   = ((var_sumY_excluding_NaNstrata - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata));
-p_val_X_vs_strata   = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-p_val_Y_vs_strata   = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-
-print ("Computing the global bivariate statistics...");
-
-# Compute the aggregate X vs. Y statistics and map them into proper positions
-
-cnt_XY_rectangle       = t(X_mask) %*% Y_mask;
-sum_X_forXY_rectangle  = t(X)      %*% Y_mask;
-sum_XX_forXY_rectangle = t(X * X)  %*% Y_mask;
-sum_Y_forXY_rectangle  = t(X_mask) %*% Y;
-sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
-sum_XY_rectangle       = t(X)      %*% Y;
-cnt_XY_global       = matrix (cnt_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_X_forXY_global  = matrix (sum_X_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_Y_forXY_global  = matrix (sum_Y_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XY_global       = matrix (sum_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
-ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
-
-# Compute the global bivariate statistics for output
-
-cov_sumX_sumY_global   = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-var_sumX_forXY_global  = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global;
-var_sumY_forXY_global  = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-slope_XY_global        = cov_sumX_sumY_global / var_sumX_forXY_global;
-                                                sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global;
-                                                sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5);
-corr_XY_global         = cov_sumX_sumY_global / sqrt_failsafe_output_5;
-r_sqr_X_vs_Y_global    = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global);
-                         sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
-stdev_slope_XY_global  = sqrt_failsafe (sqrt_failsafe_input_6);
-                         sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2)
-stdev_errY_vs_X_global = sqrt_failsafe (sqrt_failsafe_input_7);
-fStat_Y_vs_X_global    = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global);
-p_val_Y_vs_X_global    = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2);
-
-print ("Computing the stratified bivariate statistics...");
-
-# Create projections to "intermingle" X and Y into attribute pairs
-
-Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
-Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
-ones_Y_row   = matrix (1.0, rows = 1, cols = num_attrs_Y);
-for (i in 1:num_attrs_X) {
-    start_cid = (i - 1) * num_attrs_Y + 1;
-    end_cid = i * num_attrs_Y;
-    Proj_X_to_XY [i, start_cid:end_cid] = ones_Y_row;
-    Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_row);
-}
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
-
-Cnt_XY_per_stratum       = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_X_forXY_per_stratum  = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_Y_forXY_per_stratum  = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
-Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY));
-Sum_XY_per_stratum       = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
-
-Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
-One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum);
-num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
-
-# Recompute some global aggregate X vs. Y statistics to exclude NaN-stratum records
-
-cnt_XY_excluding_NaNstrata = colSums (Cnt_XY_per_stratum);
-sum_XX_forXY_excluding_NaNstrata = colSums (Sum_XX_forXY_per_stratum);
-sum_YY_forXY_excluding_NaNstrata = colSums (Sum_YY_forXY_per_stratum);
-sum_XY_excluding_NaNstrata = colSums (Sum_XY_per_stratum);
-
-# Compute the stratified bivariate statistics
-
-var_sumX_forXY_stratified = sum_XX_forXY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-var_sumY_forXY_stratified = sum_YY_forXY_excluding_NaNstrata - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-cov_sumX_sumY_stratified  = sum_XY_excluding_NaNstrata       - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-
-slope_XY_stratified     = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
-                                                     sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified;
-                                                     sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8);
-corr_XY_stratified      = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
-r_sqr_X_vs_Y_stratified = cov_sumX_sumY_stratified * cov_sumX_sumY_stratified / (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
-r_sqr_X_vs_Y_stratified = corr_XY_stratified * corr_XY_stratified;
-                             sqrt_failsafe_input_9 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / var_sumX_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-stdev_slope_XY_stratified  = sqrt_failsafe (sqrt_failsafe_input_9);
-                             sqrt_failsafe_input_10 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-stdev_errY_vs_X_stratified = sqrt_failsafe (sqrt_failsafe_input_10);
-fStat_Y_vs_X_stratified = (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
-p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-
-print ("Preparing the output matrix...");
-OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
-
-OutMtx [ 1, ] = Xcols              %*% Proj_X_to_XY;  # 1st variate column number
-OutMtx [ 2, ] = cnt_X_global       %*% Proj_X_to_XY;  # 1st variate global presence count
-OutMtx [ 3, ] = avg_X_global       %*% Proj_X_to_XY;  # 1st variate global mean
-OutMtx [ 4, ] = stdev_X_global     %*% Proj_X_to_XY;  # 1st variate global standard deviation
-OutMtx [ 5, ] = stdev_X_stratified %*% Proj_X_to_XY;  # 1st variate stratified standard deviation
-OutMtx [ 6, ] = r_sqr_X_vs_strata  %*% Proj_X_to_XY;  # R-squared, 1st variate vs. strata
-OutMtx [ 7, ] = p_val_X_vs_strata  %*% Proj_X_to_XY;  # P-value, 1st variate vs. strata
-OutMtx [11, ] = Ycols              %*% Proj_Y_to_XY;  # 2nd variate column number
-OutMtx [12, ] = cnt_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global presence count
-OutMtx [13, ] = avg_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global mean
-OutMtx [14, ] = stdev_Y_global     %*% Proj_Y_to_XY;  # 2nd variate global standard deviation
-OutMtx [15, ] = stdev_Y_stratified %*% Proj_Y_to_XY;  # 2nd variate stratified standard deviation
-OutMtx [16, ] = r_sqr_Y_vs_strata  %*% Proj_Y_to_XY;  # R-squared, 2nd variate vs. strata
-OutMtx [17, ] = p_val_Y_vs_strata  %*% Proj_Y_to_XY;  # P-value, 2nd variate vs. strata
-
-
-OutMtx [21, ] = cnt_XY_global;              # Global 1st & 2nd variate presence count
-OutMtx [22, ] = slope_XY_global;            # Global regression slope (2nd variate vs. 1st variate)
-OutMtx [23, ] = stdev_slope_XY_global;      # Global regression slope standard deviation
-OutMtx [24, ] = corr_XY_global;             # Global correlation = +/- sqrt(R-squared)
-OutMtx [25, ] = stdev_errY_vs_X_global;     # Global residual standard deviation
-OutMtx [26, ] = r_sqr_X_vs_Y_global;        # Global R-squared
-OutMtx [27, ] = p_val_Y_vs_X_global;        # Global P-value for hypothesis "slope = 0"
-OutMtx [31, ] = cnt_XY_excluding_NaNstrata; # Stratified 1st & 2nd variate presence count
-OutMtx [32, ] = slope_XY_stratified;        # Stratified regression slope (2nd variate vs. 1st variate)
-OutMtx [33, ] = stdev_slope_XY_stratified;  # Stratified regression slope standard deviation
-OutMtx [34, ] = corr_XY_stratified;         # Stratified correlation = +/- sqrt(R-squared)
-OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard deviation
-OutMtx [36, ] = r_sqr_X_vs_Y_stratified;    # Stratified R-squared
-OutMtx [37, ] = p_val_Y_vs_X_stratified;    # Stratified P-value for hypothesis "slope = 0"
-OutMtx [38, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">="));  # Number of strata with at least two counted points
-
-OutMtx = t(OutMtx);
-
-print ("Writing the output matrix...");
-write (OutMtx, $5, format="text");
-print ("END STRATIFIED STATISTICS SCRIPT");
-
-
-deNaN = externalFunction (Matrix[Double] A) return (Matrix[Double] B)
-        implemented in (classname = "org.apache.sysml.udf.lib.DeNaNWrapper", exectype = "mem");
-
-fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob)
-{ # TEMPORARY IMPLEMENTATION
-    tailprob = fStat;
-    for (i in 1:nrow(fStat)) {
-      for (j in 1:ncol(fStat)) {
-        q = castAsScalar (fStat [i, j]);
-        d1 = castAsScalar (df_1 [i, j]);
-        d2 = castAsScalar (df_2 [i, j]);
-        if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
-            tailprob  [i, j] = pf (target = q, df1 = d1, df2 = d2, lower.tail=FALSE);
-        } else {
-            tailprob  [i, j] = 0/0;
-        }
-    } }
-}
-
-sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A)
-{
-    NaN = 0/0;
-    mask_A = ppred (input_A, 0.0, ">=");
-    prep_A = input_A * mask_A;
-    mask_A = mask_A - mask_A * (ppred (prep_A, NaN, "=="));
-    prep_A = deNaN (prep_A);
-    output_A = sqrt (prep_A) / mask_A;
-}
-
-sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
-{
-    shift = 1;
-    m_A = nrow(A);
-    sum_A = A;
-    while (shift < m_A) {
-        sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ];
-        shift = 2 * shift;
-    } 
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# STRATIFIED BIVARIATE STATISTICS, VERSION 2
+# 
+# INPUT  1: Dataset with records as rows (matrix filename)
+# INPUT  2: The stratum ID column number (integer)
+#   Stratum ID must be a small positive integer; fractional values are rounded; if 0 or less, shifted to positive.
+# INPUT  3: 1st variate column numbers (matrix filename)
+# INPUT  4: 2nd variate column numbers (matrix filename)
+# INPUT  5: Output (matrix filename)
+#
+# OUTPUT 1: Output Matrix with 40 columns, containing the following information:
+#     Rows: One row per each distinct pair (1st variate, 2nd variate)
+#     Col 01: 1st variate column number
+#     Col 02: 1st variate global presence count
+#     Col 03: 1st variate global mean
+#     Col 04: 1st variate global standard deviation
+#     Col 05: 1st variate stratified standard deviation
+#     Col 06: R-squared, 1st variate vs. strata
+#     Col 07: P-value, 1st variate vs. strata
+#     Col 08-10: Reserved
+#     Col 11: 2nd variate column number
+#     Col 12: 2nd variate global presence count
+#     Col 13: 2nd variate global mean
+#     Col 14: 2nd variate global standard deviation
+#     Col 15: 2nd variate stratified standard deviation
+#     Col 16: R-squared, 2nd variate vs. strata
+#     Col 17: P-value, 2nd variate vs. strata
+#     Col 18-20: Reserved
+#     Col 21: Global 1st & 2nd variate presence count
+#     Col 22: Global regression slope (2nd variate vs. 1st variate)
+#     Col 23: Global regression slope standard deviation
+#     Col 24: Global correlation = +/- sqrt(R-squared)
+#     Col 25: Global residual standard deviation
+#     Col 26: Global R-squared
+#     Col 27: Global P-value for hypothesis "slope = 0"
+#     Col 28-30: Reserved
+#     Col 31: Stratified 1st & 2nd variate presence count
+#     Col 32: Stratified regression slope (2nd variate vs. 1st variate)
+#     Col 33: Stratified regression slope standard deviation
+#     Col 34: Stratified correlation = +/- sqrt(R-squared)
+#     Col 35: Stratified residual standard deviation
+#     Col 36: Stratified R-squared
+#     Col 37: Stratified P-value for hypothesis "slope = 0"
+#     Col 38: Number of strata with at least two counted points
+#     Col 39-40: Reserved
+#     TO DO: GOODNESS OF FIT MEASURE
+#
+# EXAMPLE:
+# hadoop jar SystemML.jar -f PATH/stratstats.dml -exec singlenode -args PATH/stratstats_test_data.mtx 1 PATH/stratstats_test_X.mtx PATH/stratstats_test_Y.mtx PATH/stratstats_test_output.mtx
+
+NaN = 0/0;
+
+print ("BEGIN STRATIFIED STATISTICS SCRIPT");
+
+print ("Reading the input matrices...");
+
+DataWithNaNs = read ($1, format = "text");
+Xcols = read ($3, format = "text");
+Ycols = read ($4, format = "text");
+stratum_column_id = $2;
+num_records  = nrow(DataWithNaNs);
+num_attrs    = ncol(DataWithNaNs);
+num_attrs_X  = ncol(Xcols);
+num_attrs_Y  = ncol(Ycols);
+num_attrs_XY = num_attrs_X * num_attrs_Y;
+
+
+print ("Preparing the variates...");
+
+Data = deNaN (DataWithNaNs);
+DataNaNmask = ppred (DataWithNaNs, NaN, "==");
+
+tXcols = t(Xcols);
+ones = matrix (1.0, rows = num_attrs_X, cols = 1);
+one_to_num_attrs_X = sumup (ones);
+ProjX = matrix (0.0, rows = num_attrs, cols = num_attrs_X);
+ProjX_ctable = table (tXcols, one_to_num_attrs_X);
+ProjX [1:nrow(ProjX_ctable), ] = ProjX_ctable;
+X = Data %*% ProjX;
+X_mask = 1 - (DataNaNmask %*% ProjX);
+
+tYcols = t(Ycols);
+ones = matrix (1.0, rows = num_attrs_Y, cols = 1);
+one_to_num_attrs_Y = sumup (ones);
+ProjY = matrix (0.0, rows = num_attrs, cols = num_attrs_Y);
+ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
+ProjY [1:nrow(ProjY_ctable), ] = ProjY_ctable;
+Y = Data %*% ProjY;
+Y_mask = 1 - (DataNaNmask %*% ProjY);
+
+
+print ("Preparing the strata...");
+
+Proj_to_deNaN_strata = diag (1 - DataNaNmask [, stratum_column_id]);
+Proj_to_deNaN_strata = removeEmpty (target = Proj_to_deNaN_strata, margin = "rows");
+vector_of_strata_with_empty_but_no_NaNs = round (Proj_to_deNaN_strata %*% (Data [, stratum_column_id]));
+vector_of_strata_with_empty_but_no_NaNs = vector_of_strata_with_empty_but_no_NaNs + (1 - min (vector_of_strata_with_empty_but_no_NaNs));
+num_strata_with_empty_but_no_NaNs = max (vector_of_strata_with_empty_but_no_NaNs);
+num_records_with_nonNaN_strata = nrow (Proj_to_deNaN_strata);
+ones = matrix (1.0, rows = num_records_with_nonNaN_strata, cols = 1);
+one_to_num_records_with_nonNaN_strata = sumup (ones);
+StrataSummator_with_empty_from_nonNaNs = table (vector_of_strata_with_empty_but_no_NaNs, one_to_num_records_with_nonNaN_strata);
+StrataSummator_from_nonNaNs = removeEmpty (target = StrataSummator_with_empty_from_nonNaNs, margin = "rows");
+StrataSummator = StrataSummator_from_nonNaNs %*% Proj_to_deNaN_strata;
+num_strata = nrow (StrataSummator);
+num_empty_strata = num_strata_with_empty_but_no_NaNs - num_strata;
+print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but non-NaN strata.");
+
+print ("Computing the global single-variate statistics...");
+
+cnt_X_global = colSums (X_mask);
+cnt_Y_global = colSums (Y_mask);
+avg_X_global = colSums (X) / cnt_X_global;
+avg_Y_global = colSums (Y) / cnt_Y_global;
+var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global);
+var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global);
+                 sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
+stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
+                 sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1)
+stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
+
+print ("Computing the stratified single-variate statistics...");
+
+# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
+
+Cnt_X_per_stratum = StrataSummator %*% X_mask;
+Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
+Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
+Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
+One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum);
+One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum);
+num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
+num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
+
+Sum_X_per_stratum  = StrataSummator %*% X;
+Sum_Y_per_stratum  = StrataSummator %*% Y;
+
+# Recompute some global statistics to exclude NaN-stratum records
+
+cnt_X_excluding_NaNstrata = colSums (Cnt_X_per_stratum);
+cnt_Y_excluding_NaNstrata = colSums (Cnt_Y_per_stratum);
+sum_X_excluding_NaNstrata = colSums (Sum_X_per_stratum);
+sum_Y_excluding_NaNstrata = colSums (Sum_Y_per_stratum);
+var_sumX_excluding_NaNstrata = colSums (StrataSummator %*% (X * X)) - (sum_X_excluding_NaNstrata * sum_X_excluding_NaNstrata) / cnt_X_excluding_NaNstrata;
+var_sumY_excluding_NaNstrata = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_excluding_NaNstrata * sum_Y_excluding_NaNstrata) / cnt_Y_excluding_NaNstrata;
+
+# Compute the stratified statistics
+
+var_sumX_stratified = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
+var_sumY_stratified = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
+                      sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
+stdev_X_stratified  = sqrt_failsafe (sqrt_failsafe_input_3);
+                      sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
+stdev_Y_stratified  = sqrt_failsafe (sqrt_failsafe_input_4);
+r_sqr_X_vs_strata   = 1 - var_sumX_stratified / var_sumX_excluding_NaNstrata;
+r_sqr_Y_vs_strata   = 1 - var_sumY_stratified / var_sumY_excluding_NaNstrata;
+fStat_X_vs_strata   = ((var_sumX_excluding_NaNstrata - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata));
+fStat_Y_vs_strata   = ((var_sumY_excluding_NaNstrata - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata));
+p_val_X_vs_strata   = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
+p_val_Y_vs_strata   = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
+
+print ("Computing the global bivariate statistics...");
+
+# Compute the aggregate X vs. Y statistics and map them into proper positions
+
+cnt_XY_rectangle       = t(X_mask) %*% Y_mask;
+sum_X_forXY_rectangle  = t(X)      %*% Y_mask;
+sum_XX_forXY_rectangle = t(X * X)  %*% Y_mask;
+sum_Y_forXY_rectangle  = t(X_mask) %*% Y;
+sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
+sum_XY_rectangle       = t(X)      %*% Y;
+cnt_XY_global       = matrix (cnt_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_X_forXY_global  = matrix (sum_X_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_Y_forXY_global  = matrix (sum_Y_forXY_rectangle,  rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
+sum_XY_global       = matrix (sum_XY_rectangle,       rows = 1, cols = num_attrs_XY, byrow = TRUE);
+ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
+
+# Compute the global bivariate statistics for output
+
+cov_sumX_sumY_global   = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global;
+var_sumX_forXY_global  = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global;
+var_sumY_forXY_global  = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global;
+slope_XY_global        = cov_sumX_sumY_global / var_sumX_forXY_global;
+                                                sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global;
+                                                sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5);
+corr_XY_global         = cov_sumX_sumY_global / sqrt_failsafe_output_5;
+r_sqr_X_vs_Y_global    = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global);
+                         sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
+stdev_slope_XY_global  = sqrt_failsafe (sqrt_failsafe_input_6);
+                         sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2)
+stdev_errY_vs_X_global = sqrt_failsafe (sqrt_failsafe_input_7);
+fStat_Y_vs_X_global    = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global);
+p_val_Y_vs_X_global    = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2);
+
+print ("Computing the stratified bivariate statistics...");
+
+# Create projections to "intermingle" X and Y into attribute pairs
+
+Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
+Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
+ones_Y_row   = matrix (1.0, rows = 1, cols = num_attrs_Y);
+for (i in 1:num_attrs_X) {
+    start_cid = (i - 1) * num_attrs_Y + 1;
+    end_cid = i * num_attrs_Y;
+    Proj_X_to_XY [i, start_cid:end_cid] = ones_Y_row;
+    Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_row);
+}
+
+# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
+
+Cnt_XY_per_stratum       = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_X_forXY_per_stratum  = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
+Sum_Y_forXY_per_stratum  = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
+Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY));
+Sum_XY_per_stratum       = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( Y      %*% Proj_Y_to_XY));
+
+Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
+One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum);
+num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
+
+# Recompute some global aggregate X vs. Y statistics to exclude NaN-stratum records
+
+cnt_XY_excluding_NaNstrata = colSums (Cnt_XY_per_stratum);
+sum_XX_forXY_excluding_NaNstrata = colSums (Sum_XX_forXY_per_stratum);
+sum_YY_forXY_excluding_NaNstrata = colSums (Sum_YY_forXY_per_stratum);
+sum_XY_excluding_NaNstrata = colSums (Sum_XY_per_stratum);
+
+# Compute the stratified bivariate statistics
+
+var_sumX_forXY_stratified = sum_XX_forXY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+var_sumY_forXY_stratified = sum_YY_forXY_excluding_NaNstrata - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+cov_sumX_sumY_stratified  = sum_XY_excluding_NaNstrata       - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
+
+slope_XY_stratified     = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
+                                                     sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified;
+                                                     sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8);
+corr_XY_stratified      = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
+r_sqr_X_vs_Y_stratified = cov_sumX_sumY_stratified * cov_sumX_sumY_stratified / (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
+r_sqr_X_vs_Y_stratified = corr_XY_stratified * corr_XY_stratified;
+                             sqrt_failsafe_input_9 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / var_sumX_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
+stdev_slope_XY_stratified  = sqrt_failsafe (sqrt_failsafe_input_9);
+                             sqrt_failsafe_input_10 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
+stdev_errY_vs_X_stratified = sqrt_failsafe (sqrt_failsafe_input_10);
+fStat_Y_vs_X_stratified = (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
+p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
+
+print ("Preparing the output matrix...");
+OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
+
+OutMtx [ 1, ] = Xcols              %*% Proj_X_to_XY;  # 1st variate column number
+OutMtx [ 2, ] = cnt_X_global       %*% Proj_X_to_XY;  # 1st variate global presence count
+OutMtx [ 3, ] = avg_X_global       %*% Proj_X_to_XY;  # 1st variate global mean
+OutMtx [ 4, ] = stdev_X_global     %*% Proj_X_to_XY;  # 1st variate global standard deviation
+OutMtx [ 5, ] = stdev_X_stratified %*% Proj_X_to_XY;  # 1st variate stratified standard deviation
+OutMtx [ 6, ] = r_sqr_X_vs_strata  %*% Proj_X_to_XY;  # R-squared, 1st variate vs. strata
+OutMtx [ 7, ] = p_val_X_vs_strata  %*% Proj_X_to_XY;  # P-value, 1st variate vs. strata
+OutMtx [11, ] = Ycols              %*% Proj_Y_to_XY;  # 2nd variate column number
+OutMtx [12, ] = cnt_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global presence count
+OutMtx [13, ] = avg_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global mean
+OutMtx [14, ] = stdev_Y_global     %*% Proj_Y_to_XY;  # 2nd variate global standard deviation
+OutMtx [15, ] = stdev_Y_stratified %*% Proj_Y_to_XY;  # 2nd variate stratified standard deviation
+OutMtx [16, ] = r_sqr_Y_vs_strata  %*% Proj_Y_to_XY;  # R-squared, 2nd variate vs. strata
+OutMtx [17, ] = p_val_Y_vs_strata  %*% Proj_Y_to_XY;  # P-value, 2nd variate vs. strata
+
+
+OutMtx [21, ] = cnt_XY_global;              # Global 1st & 2nd variate presence count
+OutMtx [22, ] = slope_XY_global;            # Global regression slope (2nd variate vs. 1st variate)
+OutMtx [23, ] = stdev_slope_XY_global;      # Global regression slope standard deviation
+OutMtx [24, ] = corr_XY_global;             # Global correlation = +/- sqrt(R-squared)
+OutMtx [25, ] = stdev_errY_vs_X_global;     # Global residual standard deviation
+OutMtx [26, ] = r_sqr_X_vs_Y_global;        # Global R-squared
+OutMtx [27, ] = p_val_Y_vs_X_global;        # Global P-value for hypothesis "slope = 0"
+OutMtx [31, ] = cnt_XY_excluding_NaNstrata; # Stratified 1st & 2nd variate presence count
+OutMtx [32, ] = slope_XY_stratified;        # Stratified regression slope (2nd variate vs. 1st variate)
+OutMtx [33, ] = stdev_slope_XY_stratified;  # Stratified regression slope standard deviation
+OutMtx [34, ] = corr_XY_stratified;         # Stratified correlation = +/- sqrt(R-squared)
+OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard deviation
+OutMtx [36, ] = r_sqr_X_vs_Y_stratified;    # Stratified R-squared
+OutMtx [37, ] = p_val_Y_vs_X_stratified;    # Stratified P-value for hypothesis "slope = 0"
+OutMtx [38, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">="));  # Number of strata with at least two counted points
+
+OutMtx = t(OutMtx);
+
+print ("Writing the output matrix...");
+write (OutMtx, $5, format="text");
+print ("END STRATIFIED STATISTICS SCRIPT");
+
+
+deNaN = externalFunction (Matrix[Double] A) return (Matrix[Double] B)
+        implemented in (classname = "org.apache.sysml.udf.lib.DeNaNWrapper", exectype = "mem");
+
+fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob)
+{ # TEMPORARY IMPLEMENTATION
+    tailprob = fStat;
+    for (i in 1:nrow(fStat)) {
+      for (j in 1:ncol(fStat)) {
+        q = castAsScalar (fStat [i, j]);
+        d1 = castAsScalar (df_1 [i, j]);
+        d2 = castAsScalar (df_2 [i, j]);
+        if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
+            tailprob  [i, j] = pf (target = q, df1 = d1, df2 = d2, lower.tail=FALSE);
+        } else {
+            tailprob  [i, j] = 0/0;
+        }
+    } }
+}
+
+sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A)
+{
+    NaN = 0/0;
+    mask_A = ppred (input_A, 0.0, ">=");
+    prep_A = input_A * mask_A;
+    mask_A = mask_A - mask_A * (ppred (prep_A, NaN, "=="));
+    prep_A = deNaN (prep_A);
+    output_A = sqrt (prep_A) / mask_A;
+}
+
+sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
+{
+    shift = 1;
+    m_A = nrow(A);
+    sum_A = A;
+    while (shift < m_A) {
+        sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ];
+        shift = 2 * shift;
+    } 
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/wilson_score.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/wilson_score.dml b/src/test/scripts/applications/ctableStats/wilson_score.dml
index 90db3fc..27d0899 100644
--- a/src/test/scripts/applications/ctableStats/wilson_score.dml
+++ b/src/test/scripts/applications/ctableStats/wilson_score.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Computes 95% confidence intervals for binomial ratios using Wilson Score and Exact Score
-# INPUT 1: Matrix [rows, 2] of integer counts (m, n) where 0 <= m <= n
-# INPUT 2: The number of rows
-# INPUT 3: The output file
-# OUTPUT : Matrix [rows, 15] of doubles, containing the following information:
-#     (m / sum(m), Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right, 
-#      n / sum(n), Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right, 
-#      m / n,      Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right)
-# PLEASE BE AWARE THAT FOR EXTREMELY SMALL COUNTS THE WILSON INTERVALS WILL BE WRONG! THEY USE GAUSSIAN APPROXIMATION!
-# EXAMPLE: wilson_score.dml -args "test/scripts/applications/ctableStats/wilson_test_input.mtx" 7 "test/scripts/applications/ctableStats/wilson_test_output.mtx"
-
-setwd ("test/scripts/applications/ctableStats");
-source ("Binomial.dml");
-
-# test_n = Rand (rows = 1, cols = 1, min = 6, max = 6);
-# test_m = Rand (rows = 1, cols = 1, min = 0, max = 0);
-# test_p = Rand (rows = 1, cols = 1, min = 0.00421, max = 0.00421);
-# [alpha] = binomProb (test_n, test_m, test_p);
-# print ("TEST:  Prob [Binom (" + castAsScalar (test_n) + ", " + castAsScalar (test_p) + ") <= " + castAsScalar (test_m) + "]  =  " + castAsScalar (alpha));
-
-print ("BEGIN WILSON SCORE SCRIPT");
-print ("Reading X...");
-X = read ($1, rows = $2, cols = 2, format = "text");
-num_rows = $2;
-print ("Performing the computation...");
-
-M = X [, 1];
-N = X [, 2];
-blahh = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
-sum_M = blahh * sum(M);
-sum_N = blahh * sum(N);
-
-[p_m_sum, l_m_sum_wilson, r_m_sum_wilson] = wilson_confidence (sum_M, M);
-[p_n_sum, l_n_sum_wilson, r_n_sum_wilson] = wilson_confidence (sum_N, N);
-[p_m_n, l_m_n_wilson, r_m_n_wilson] = wilson_confidence (N, M);
-
-M_minus_1 = M - 1;
-N_minus_1 = N - 1;
-big_alpha   = 0.975 * blahh;
-small_alpha = 0.025 * blahh;
-
-[l_m_sum_exact]   = binomQuantile (sum_M, M_minus_1, big_alpha);
-[r_m_sum_exact]   = binomQuantile (sum_M, M, small_alpha);
-[l_n_sum_exact]   = binomQuantile (sum_N, N_minus_1, big_alpha);
-[r_n_sum_exact]   = binomQuantile (sum_N, N, small_alpha);
-[l_m_n_exact]     = binomQuantile (N, M_minus_1, big_alpha);
-[r_m_n_exact]     = binomQuantile (N, M, small_alpha);
-
-result = Rand (rows = num_rows, cols = 15, min = 0.0, max = 0.0);
-result [,  1] = p_m_sum;
-result [,  2] = l_m_sum_wilson;
-result [,  3] = r_m_sum_wilson;
-result [,  4] = l_m_sum_exact;
-result [,  5] = r_m_sum_exact;
-result [,  6] = p_n_sum;
-result [,  7] = l_n_sum_wilson;
-result [,  8] = r_n_sum_wilson;
-result [,  9] = l_n_sum_exact;
-result [, 10] = r_n_sum_exact;
-result [, 11] = p_m_n;
-result [, 12] = l_m_n_wilson;
-result [, 13] = r_m_n_wilson;
-result [, 14] = l_m_n_exact;
-result [, 15] = r_m_n_exact;
-
-print ("M / sum(M)  RESULTS:  Wilson, Exact");
-
-for (i in 1:num_rows) {
-    p1  = castAsScalar (round (result [i,  1] * 100000) / 1000);
-    lw1 = castAsScalar (round (result [i,  2] * 100000) / 1000);
-    rw1 = castAsScalar (round (result [i,  3] * 100000) / 1000);
-    le1 = castAsScalar (round (result [i,  4] * 100000) / 1000);
-    re1 = castAsScalar (round (result [i,  5] * 100000) / 1000);
-    print ("Row " + i + ":   "
-        + castAsScalar (M [i, 1]) + "/" + castAsScalar (sum_M [i, 1]) + " = " 
-        + p1 + "%  [" + lw1 + "%, " + rw1 + "%]   [" + le1 + "%, " + re1 + "%]");
-}
-
-print ("N / sum(N)  RESULTS:  Wilson, Exact");
-
-for (i in 1:num_rows) {
-    p2  = castAsScalar (round (result [i,  6] * 100000) / 1000);
-    lw2 = castAsScalar (round (result [i,  7] * 100000) / 1000);
-    rw2 = castAsScalar (round (result [i,  8] * 100000) / 1000);
-    le2 = castAsScalar (round (result [i,  9] * 100000) / 1000);
-    re2 = castAsScalar (round (result [i, 10] * 100000) / 1000);
-    print ("Row " + i + ":   "
-        + castAsScalar (N [i, 1]) + "/" + castAsScalar (sum_N [i, 1]) + " = " 
-        + p2 + "%  [" + lw2 + "%, " + rw2 + "%]   [" + le2 + "%, " + re2 + "%]   ");
-}
-
-print ("M / N  RESULTS:  Wilson, Exact");
-
-for (i in 1:num_rows) {
-    p3  = castAsScalar (round (result [i, 11] * 100000) / 1000);
-    lw3 = castAsScalar (round (result [i, 12] * 100000) / 1000);
-    rw3 = castAsScalar (round (result [i, 13] * 100000) / 1000);
-    le3 = castAsScalar (round (result [i, 14] * 100000) / 1000);
-    re3 = castAsScalar (round (result [i, 15] * 100000) / 1000);
-    print ("Row " + i + ":   "
-        + castAsScalar (M [i, 1]) + "/" + castAsScalar (    N [i, 1]) + " = " 
-        + p3 + "%  [" + lw3 + "%, " + rw3 + "%]   [" + le3 + "%, " + re3 + "%]   ");
-}
-
-
-
-print ("Writing the results...");
-write (result, $3, format = "text");
-print ("END WILSON SCORE SCRIPT");
-
-
-wilson_confidence = function (Matrix[double] n, Matrix[double] m)
-return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
-{
-    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
-    z_sq_n = z * z * n;
-    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
-    midpt = n * m + z_sq_n / 2;
-    denom = n * n + z_sq_n;
-    ratio = m / n;
-    conf_left  = (midpt - qroot) / denom;
-    conf_right = (midpt + qroot) / denom;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Computes 95% confidence intervals for binomial ratios using Wilson Score and Exact Score
+# INPUT 1: Matrix [rows, 2] of integer counts (m, n) where 0 <= m <= n
+# INPUT 2: The number of rows
+# INPUT 3: The output file
+# OUTPUT : Matrix [rows, 15] of doubles, containing the following information:
+#     (m / sum(m), Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right, 
+#      n / sum(n), Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right, 
+#      m / n,      Wilson 95%-conf.left, Wilson 95%-conf.right, Exact 95%-conf.left, Exact 95%-conf.right)
+# PLEASE BE AWARE THAT FOR EXTREMELY SMALL COUNTS THE WILSON INTERVALS WILL BE WRONG! THEY USE GAUSSIAN APPROXIMATION!
+# EXAMPLE: wilson_score.dml -args "test/scripts/applications/ctableStats/wilson_test_input.mtx" 7 "test/scripts/applications/ctableStats/wilson_test_output.mtx"
+
+setwd ("test/scripts/applications/ctableStats");
+source ("Binomial.dml");
+
+# test_n = Rand (rows = 1, cols = 1, min = 6, max = 6);
+# test_m = Rand (rows = 1, cols = 1, min = 0, max = 0);
+# test_p = Rand (rows = 1, cols = 1, min = 0.00421, max = 0.00421);
+# [alpha] = binomProb (test_n, test_m, test_p);
+# print ("TEST:  Prob [Binom (" + castAsScalar (test_n) + ", " + castAsScalar (test_p) + ") <= " + castAsScalar (test_m) + "]  =  " + castAsScalar (alpha));
+
+print ("BEGIN WILSON SCORE SCRIPT");
+print ("Reading X...");
+X = read ($1, rows = $2, cols = 2, format = "text");
+num_rows = $2;
+print ("Performing the computation...");
+
+M = X [, 1];
+N = X [, 2];
+blahh = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+sum_M = blahh * sum(M);
+sum_N = blahh * sum(N);
+
+[p_m_sum, l_m_sum_wilson, r_m_sum_wilson] = wilson_confidence (sum_M, M);
+[p_n_sum, l_n_sum_wilson, r_n_sum_wilson] = wilson_confidence (sum_N, N);
+[p_m_n, l_m_n_wilson, r_m_n_wilson] = wilson_confidence (N, M);
+
+M_minus_1 = M - 1;
+N_minus_1 = N - 1;
+big_alpha   = 0.975 * blahh;
+small_alpha = 0.025 * blahh;
+
+[l_m_sum_exact]   = binomQuantile (sum_M, M_minus_1, big_alpha);
+[r_m_sum_exact]   = binomQuantile (sum_M, M, small_alpha);
+[l_n_sum_exact]   = binomQuantile (sum_N, N_minus_1, big_alpha);
+[r_n_sum_exact]   = binomQuantile (sum_N, N, small_alpha);
+[l_m_n_exact]     = binomQuantile (N, M_minus_1, big_alpha);
+[r_m_n_exact]     = binomQuantile (N, M, small_alpha);
+
+result = Rand (rows = num_rows, cols = 15, min = 0.0, max = 0.0);
+result [,  1] = p_m_sum;
+result [,  2] = l_m_sum_wilson;
+result [,  3] = r_m_sum_wilson;
+result [,  4] = l_m_sum_exact;
+result [,  5] = r_m_sum_exact;
+result [,  6] = p_n_sum;
+result [,  7] = l_n_sum_wilson;
+result [,  8] = r_n_sum_wilson;
+result [,  9] = l_n_sum_exact;
+result [, 10] = r_n_sum_exact;
+result [, 11] = p_m_n;
+result [, 12] = l_m_n_wilson;
+result [, 13] = r_m_n_wilson;
+result [, 14] = l_m_n_exact;
+result [, 15] = r_m_n_exact;
+
+print ("M / sum(M)  RESULTS:  Wilson, Exact");
+
+for (i in 1:num_rows) {
+    p1  = castAsScalar (round (result [i,  1] * 100000) / 1000);
+    lw1 = castAsScalar (round (result [i,  2] * 100000) / 1000);
+    rw1 = castAsScalar (round (result [i,  3] * 100000) / 1000);
+    le1 = castAsScalar (round (result [i,  4] * 100000) / 1000);
+    re1 = castAsScalar (round (result [i,  5] * 100000) / 1000);
+    print ("Row " + i + ":   "
+        + castAsScalar (M [i, 1]) + "/" + castAsScalar (sum_M [i, 1]) + " = " 
+        + p1 + "%  [" + lw1 + "%, " + rw1 + "%]   [" + le1 + "%, " + re1 + "%]");
+}
+
+print ("N / sum(N)  RESULTS:  Wilson, Exact");
+
+for (i in 1:num_rows) {
+    p2  = castAsScalar (round (result [i,  6] * 100000) / 1000);
+    lw2 = castAsScalar (round (result [i,  7] * 100000) / 1000);
+    rw2 = castAsScalar (round (result [i,  8] * 100000) / 1000);
+    le2 = castAsScalar (round (result [i,  9] * 100000) / 1000);
+    re2 = castAsScalar (round (result [i, 10] * 100000) / 1000);
+    print ("Row " + i + ":   "
+        + castAsScalar (N [i, 1]) + "/" + castAsScalar (sum_N [i, 1]) + " = " 
+        + p2 + "%  [" + lw2 + "%, " + rw2 + "%]   [" + le2 + "%, " + re2 + "%]   ");
+}
+
+print ("M / N  RESULTS:  Wilson, Exact");
+
+for (i in 1:num_rows) {
+    p3  = castAsScalar (round (result [i, 11] * 100000) / 1000);
+    lw3 = castAsScalar (round (result [i, 12] * 100000) / 1000);
+    rw3 = castAsScalar (round (result [i, 13] * 100000) / 1000);
+    le3 = castAsScalar (round (result [i, 14] * 100000) / 1000);
+    re3 = castAsScalar (round (result [i, 15] * 100000) / 1000);
+    print ("Row " + i + ":   "
+        + castAsScalar (M [i, 1]) + "/" + castAsScalar (    N [i, 1]) + " = " 
+        + p3 + "%  [" + lw3 + "%, " + rw3 + "%]   [" + le3 + "%, " + re3 + "%]   ");
+}
+
+
+
+print ("Writing the results...");
+write (result, $3, format = "text");
+print ("END WILSON SCORE SCRIPT");
+
+
+wilson_confidence = function (Matrix[double] n, Matrix[double] m)
+return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
+{
+    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
+    z_sq_n = z * z * n;
+    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
+    midpt = n * m + z_sq_n / 2;
+    denom = n * n + z_sq_n;
+    ratio = m / n;
+    conf_left  = (midpt - qroot) / denom;
+    conf_right = (midpt + qroot) / denom;
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/zipftest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/zipftest.dml b/src/test/scripts/applications/ctableStats/zipftest.dml
index 08fe217..50ac5f4 100644
--- a/src/test/scripts/applications/ctableStats/zipftest.dml
+++ b/src/test/scripts/applications/ctableStats/zipftest.dml
@@ -1,77 +1,77 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Generator of random records with boolean features
-# Average record (row) and feature (column) densities follow
-#   power laws:  E(#1s in line k) = const / (k + add)^pow
-# Cell[1, 1] has the highest probability to be 1, also input 
-
-# By setting num_features >> num_records we allow lots of rare
-# features while keeping most records nonempty.
-# The power ("pow") in the power law determines the tail behavior;
-# The additive ("add") determines how steeply the density changes
-#   in the first few records or features.
-
-num_records = 1000;    # The number of records (rows)
-num_features = 50000;  # The number of boolean features (columns)
-
-pow_records = 2.0;     # The Zipf law power for record  density
-pow_features = 1.0;    # The Zipf law power for feature density
-
-add_records = 100.0;   # The additive shift for record  density
-add_features = 20.0;   # The additive shift for feature density
-
-max_cell_prob = 1.0;   # The probability for Cell[1, 1] to be 1
-
-############
-
-c = max_cell_prob * ((1.0 + add_records)^pow_records) * ((1.0 + add_features)^pow_features);
-
-vec_records = matrix (1.0, rows = num_records, cols = 1);
-vec_records = sumup (vec_records);
-vec_records = 1.0 / ((vec_records + add_records)^pow_records);
-
-vec_features = matrix (1.0, rows = num_features, cols = 1);
-vec_features = sumup (vec_features);
-vec_features = 1.0 / ((t(vec_features) + add_features)^pow_features);
-
-Probs = c * (vec_records %*% vec_features);
-avg_density_records = rowSums (Probs);
-avg_density_features = colSums (Probs);
-
-Tosses = Rand (rows = num_records, cols = num_features, min = 0.0, max = 1.0);
-Data = ppred (Tosses, Probs, "<=");
-
-write (avg_density_records,  "Zipf.AvgDensity.Rows", format="text");
-write (avg_density_features, "Zipf.AvgDensity.Cols", format="text");
-write (Data, "Zipf.Data", format="text");
-
-
-sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
-{
-    shift = 1;
-    m_A = nrow(A);
-    sum_A = A;
-    while (shift < m_A) {
-        sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ];
-        shift = 2 * shift;
-    } 
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Generator of random records with boolean features
+# Average record (row) and feature (column) densities follow
+#   power laws:  E(#1s in line k) = const / (k + add)^pow
+# Cell[1, 1] has the highest probability to be 1, also input 
+
+# By setting num_features >> num_records we allow lots of rare
+# features while keeping most records nonempty.
+# The power ("pow") in the power law determines the tail behavior;
+# The additive ("add") determines how steeply the density changes
+#   in the first few records or features.
+
+num_records = 1000;    # The number of records (rows)
+num_features = 50000;  # The number of boolean features (columns)
+
+pow_records = 2.0;     # The Zipf law power for record  density
+pow_features = 1.0;    # The Zipf law power for feature density
+
+add_records = 100.0;   # The additive shift for record  density
+add_features = 20.0;   # The additive shift for feature density
+
+max_cell_prob = 1.0;   # The probability for Cell[1, 1] to be 1
+
+############
+
+c = max_cell_prob * ((1.0 + add_records)^pow_records) * ((1.0 + add_features)^pow_features);
+
+vec_records = matrix (1.0, rows = num_records, cols = 1);
+vec_records = sumup (vec_records);
+vec_records = 1.0 / ((vec_records + add_records)^pow_records);
+
+vec_features = matrix (1.0, rows = num_features, cols = 1);
+vec_features = sumup (vec_features);
+vec_features = 1.0 / ((t(vec_features) + add_features)^pow_features);
+
+Probs = c * (vec_records %*% vec_features);
+avg_density_records = rowSums (Probs);
+avg_density_features = colSums (Probs);
+
+Tosses = Rand (rows = num_records, cols = num_features, min = 0.0, max = 1.0);
+Data = ppred (Tosses, Probs, "<=");
+
+write (avg_density_records,  "Zipf.AvgDensity.Rows", format="text");
+write (avg_density_features, "Zipf.AvgDensity.Cols", format="text");
+write (Data, "Zipf.Data", format="text");
+
+
+sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
+{
+    shift = 1;
+    m_A = nrow(A);
+    sum_A = A;
+    while (shift < m_A) {
+        sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ];
+        shift = 2 * shift;
+    } 
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/Categorical.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Categorical.R b/src/test/scripts/applications/descriptivestats/Categorical.R
index 46c380d..f88c8bf 100644
--- a/src/test/scripts/applications/descriptivestats/Categorical.R
+++ b/src/test/scripts/applications/descriptivestats/Categorical.R
@@ -1,57 +1,57 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
-# command line invocation assuming $C_HOME is set to the home of the R script
-# Rscript $C_HOME/Categorical.R $C_HOME/in/ $C_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-#library("batch")
-library("Matrix")
-
-V = readMM(paste(args[1], "vector.mtx", sep=""))
-
-tab = table(V[,1])
-cat = t(as.numeric(names(tab)))
-Nc = t(as.vector(tab))
-
-# the number of categories of a categorical variable
-R = length(Nc)
-
-# total count
-s = sum(Nc)
-
-# percentage values of each categorical compare to the total case number
-Pc = Nc / s
-
-# all categorical values of a categorical variable
-C = (Nc > 0)
-
-# mode
-mx = max(Nc)
-Mode = (Nc == mx)
-
-writeMM(as(t(Nc),"CsparseMatrix"), paste(args[2], "Nc", sep=""), format="text");
-write(R, paste(args[2], "R", sep=""));
-writeMM(as(t(Pc),"CsparseMatrix"), paste(args[2], "Pc", sep=""), format="text");
-writeMM(as(t(C),"CsparseMatrix"), paste(args[2], "C", sep=""), format="text");
-writeMM(as(t(Mode),"CsparseMatrix"), paste(args[2], "Mode", sep=""), format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
+# command line invocation assuming $C_HOME is set to the home of the R script
+# Rscript $C_HOME/Categorical.R $C_HOME/in/ $C_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+#library("batch")
+library("Matrix")
+
+V = readMM(paste(args[1], "vector.mtx", sep=""))
+
+tab = table(V[,1])
+cat = t(as.numeric(names(tab)))
+Nc = t(as.vector(tab))
+
+# the number of categories of a categorical variable
+R = length(Nc)
+
+# total count
+s = sum(Nc)
+
+# percentage values of each categorical compare to the total case number
+Pc = Nc / s
+
+# all categorical values of a categorical variable
+C = (Nc > 0)
+
+# mode
+mx = max(Nc)
+Mode = (Nc == mx)
+
+writeMM(as(t(Nc),"CsparseMatrix"), paste(args[2], "Nc", sep=""), format="text");
+write(R, paste(args[2], "R", sep=""));
+writeMM(as(t(Pc),"CsparseMatrix"), paste(args[2], "Pc", sep=""), format="text");
+writeMM(as(t(C),"CsparseMatrix"), paste(args[2], "C", sep=""), format="text");
+writeMM(as(t(Mode),"CsparseMatrix"), paste(args[2], "Mode", sep=""), format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/Categorical.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Categorical.dml b/src/test/scripts/applications/descriptivestats/Categorical.dml
index 735f5d6..7599d06 100644
--- a/src/test/scripts/applications/descriptivestats/Categorical.dml
+++ b/src/test/scripts/applications/descriptivestats/Categorical.dml
@@ -1,55 +1,55 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Categorical.dml?
-# Assume C_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for vector
-# hadoop jar SystemML.jar -f $C_HOME/Categorical.dml -args "$INPUT_DIR/vector" 10000 "$OUTPUT_DIR/Nc" "$OUPUT_DIR/R" "$OUTPUT_DIR/Pc" "$OUTPUT_DIR/C" "$OUTPUT_DIR/Mode"
-
-V = read($1, rows=$2, cols=1, format="text")
-
-# a set of number of values specify the number of cases of each categorical
-Nc = table(V,1);
-
-# the number of categories of a categorical variable
-R = nrow(Nc)
-
-# total count
-s = sum(Nc)
-
-# percentage values of each categorical compare to the total case number
-Pc = Nc / s
-
-# all categorical values of a categorical variable
-C = ppred(Nc, 0, ">")
-
-# mode
-mx = max(Nc)
-Mode =  ppred(Nc, mx, "==")
-
-write(Nc, $3, format="text")
-write(R, $4)
-write(Pc, $5, format="text")
-write(C, $6, format="text")
-write(Mode, $7, format="text")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Categorical.dml?
+# Assume C_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for vector
+# hadoop jar SystemML.jar -f $C_HOME/Categorical.dml -args "$INPUT_DIR/vector" 10000 "$OUTPUT_DIR/Nc" "$OUPUT_DIR/R" "$OUTPUT_DIR/Pc" "$OUTPUT_DIR/C" "$OUTPUT_DIR/Mode"
+
+V = read($1, rows=$2, cols=1, format="text")
+
+# a set of number of values specify the number of cases of each categorical
+Nc = table(V,1);
+
+# the number of categories of a categorical variable
+R = nrow(Nc)
+
+# total count
+s = sum(Nc)
+
+# percentage values of each categorical compare to the total case number
+Pc = Nc / s
+
+# all categorical values of a categorical variable
+C = ppred(Nc, 0, ">")
+
+# mode
+mx = max(Nc)
+Mode =  ppred(Nc, mx, "==")
+
+write(Nc, $3, format="text")
+write(R, $4)
+write(Pc, $5, format="text")
+write(C, $6, format="text")
+write(Mode, $7, format="text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/CategoricalCategorical.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/CategoricalCategorical.R b/src/test/scripts/applications/descriptivestats/CategoricalCategorical.R
index 7c9785c..24a391f 100644
--- a/src/test/scripts/applications/descriptivestats/CategoricalCategorical.R
+++ b/src/test/scripts/applications/descriptivestats/CategoricalCategorical.R
@@ -1,49 +1,49 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $CC_HOME is set to the home of the R script
-# Rscript $CC_HOME/CategoricalCategorical.R $CC_HOME/in/ $CC_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-B = readMM(paste(args[1], "B.mtx", sep=""));
-
-F = table(A[,1],B[,1]);
-
-# chisq.test returns a list containing statistic, p-value, etc.
-cst = chisq.test(F);
-
-# get the chi-squared coefficient from the list
-chi_squared = as.numeric(cst[1]);
-pValue = as.numeric(cst[3]);
-
-write(pValue, paste(args[2], "PValue", sep=""));
-
-q = min(dim(F));
-W = sum(F);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-write(cramers_v, paste(args[2], "CramersV", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $CC_HOME is set to the home of the R script
+# Rscript $CC_HOME/CategoricalCategorical.R $CC_HOME/in/ $CC_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+B = readMM(paste(args[1], "B.mtx", sep=""));
+
+F = table(A[,1],B[,1]);
+
+# chisq.test returns a list containing statistic, p-value, etc.
+cst = chisq.test(F);
+
+# get the chi-squared coefficient from the list
+chi_squared = as.numeric(cst[1]);
+pValue = as.numeric(cst[3]);
+
+write(pValue, paste(args[2], "PValue", sep=""));
+
+q = min(dim(F));
+W = sum(F);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+write(cramers_v, paste(args[2], "CramersV", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/CategoricalCategorical.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/CategoricalCategorical.dml b/src/test/scripts/applications/descriptivestats/CategoricalCategorical.dml
index 626bfeb..4301f91 100644
--- a/src/test/scripts/applications/descriptivestats/CategoricalCategorical.dml
+++ b/src/test/scripts/applications/descriptivestats/CategoricalCategorical.dml
@@ -1,56 +1,56 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script CategoricalCategorical.dml?
-# Assume CC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for both A and B
-# hadoop jar SystemML.jar -f $CC_HOME/CategoricalCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/PValue" "$OUTPUT_DIR/CramersV"
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-
-# Contingency Table
-F = table(A,B);
-
-# Chi-Squared
-W = sum(F);
-r = rowSums(F);
-c = colSums(F);
-E = (r %*% c)/W;
-T = (F-E)^2/E;
-chi_squared = sum(T);
-
-# compute p-value
-degFreedom = (nrow(F)-1)*(ncol(F)-1);
-pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-# Cramer's V
-R = nrow(F);
-C = ncol(F);
-q = min(R,C);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-write(pValue, $4);
-write(cramers_v, $5);
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script CategoricalCategorical.dml?
+# Assume CC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for both A and B
+# hadoop jar SystemML.jar -f $CC_HOME/CategoricalCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/PValue" "$OUTPUT_DIR/CramersV"
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+
+# Contingency Table
+F = table(A,B);
+
+# Chi-Squared
+W = sum(F);
+r = rowSums(F);
+c = colSums(F);
+E = (r %*% c)/W;
+T = (F-E)^2/E;
+chi_squared = sum(T);
+
+# compute p-value
+degFreedom = (nrow(F)-1)*(ncol(F)-1);
+pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+# Cramer's V
+R = nrow(F);
+C = ncol(F);
+q = min(R,C);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+write(pValue, $4);
+write(cramers_v, $5);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.R
index 9e3f797..bacc7e3 100644
--- a/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.R
@@ -1,67 +1,67 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $CC_HOME is set to the home of the R script
-# Rscript $CC_HOME/CategoricalCategoricalWithWeightsTest.R $CC_HOME/in/ $CC_HOME/expected/
-# Usage: R --vanilla -args Xfile X < CategoricalCategoricalWithWeightsTest.R
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-#parseCommandArgs()
-######################
-
-print(commandArgs(TRUE)[1])
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-B = readMM(paste(args[1], "B.mtx", sep=""));
-WM = readMM(paste(args[1], "WM.mtx", sep=""));
-
-Av = A[,1];
-Bv = B[,1];
-WMv = WM[,1];
-
-# create a data frame with vectors A, B, WM
-df = data.frame(Av,Bv,WMv);
-
-# contingency table with weights
-F = xtabs ( WMv ~ Av + Bv, df);
-
-# chisq.test returns a list containing statistic, p-value, etc.
-cst = chisq.test(F);
-
-# get the chi-squared coefficient from the list
-chi_squared = as.numeric(cst[1]);
-pValue = as.numeric(cst[3]);
-
-write(pValue, paste(args[2], "PValue", sep=""));
-
-#######################
-
-q = min(dim(F));
-W = sum(F);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-write(cramers_v, paste(args[2], "CramersV", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $CC_HOME is set to the home of the R script
+# Rscript $CC_HOME/CategoricalCategoricalWithWeightsTest.R $CC_HOME/in/ $CC_HOME/expected/
+# Usage: R --vanilla -args Xfile X < CategoricalCategoricalWithWeightsTest.R
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+#parseCommandArgs()
+######################
+
+print(commandArgs(TRUE)[1])
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+B = readMM(paste(args[1], "B.mtx", sep=""));
+WM = readMM(paste(args[1], "WM.mtx", sep=""));
+
+Av = A[,1];
+Bv = B[,1];
+WMv = WM[,1];
+
+# create a data frame with vectors A, B, WM
+df = data.frame(Av,Bv,WMv);
+
+# contingency table with weights
+F = xtabs ( WMv ~ Av + Bv, df);
+
+# chisq.test returns a list containing statistic, p-value, etc.
+cst = chisq.test(F);
+
+# get the chi-squared coefficient from the list
+chi_squared = as.numeric(cst[1]);
+pValue = as.numeric(cst[3]);
+
+write(pValue, paste(args[2], "PValue", sep=""));
+
+#######################
+
+q = min(dim(F));
+W = sum(F);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+write(cramers_v, paste(args[2], "CramersV", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.dml
index f92f42c..70e50f4 100644
--- a/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/CategoricalCategoricalWithWeightsTest.dml
@@ -1,60 +1,60 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script CategoricalCategorical.dml?
-# Assume CC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for both A and B
-# hadoop jar SystemML.jar -f $CC_HOME/CategoricalCategoricalWithWeightsTest.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$INPUT_DIR/WM" "$OUPUT_DIR/PValue" "$OUTPUT_DIR/CramersV"
-
-# A <- nominal 
-# B <- nominal 
-# WM <- weights
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-# Contingency Table
-F = table(A,B,WM);
-
-# Chi-Squared
-W = sum(F);
-r = rowSums(F);
-c = colSums(F);
-E = (r %*% c)/W;
-T = (F-E)^2/E;
-chi_squared = sum(T);
-
-# compute p-value
-degFreedom = (nrow(F)-1)*(ncol(F)-1);
-pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-# Cramer's V
-R = nrow(F);
-C = ncol(F);
-q = min(R,C);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-write(pValue, $5);
-write(cramers_v, $6);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script CategoricalCategorical.dml?
+# Assume CC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for both A and B
+# hadoop jar SystemML.jar -f $CC_HOME/CategoricalCategoricalWithWeightsTest.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$INPUT_DIR/WM" "$OUPUT_DIR/PValue" "$OUTPUT_DIR/CramersV"
+
+# A <- nominal 
+# B <- nominal 
+# WM <- weights
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+# Contingency Table
+F = table(A,B,WM);
+
+# Chi-Squared
+W = sum(F);
+r = rowSums(F);
+c = colSums(F);
+E = (r %*% c)/W;
+T = (F-E)^2/E;
+chi_squared = sum(T);
+
+# compute p-value
+degFreedom = (nrow(F)-1)*(ncol(F)-1);
+pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+# Cramer's V
+R = nrow(F);
+C = ncol(F);
+q = min(R,C);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+write(pValue, $5);
+write(cramers_v, $6);
+


[38/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/utils/project.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/project.dml b/scripts/utils/project.dml
index dc69bd0..ee6cd80 100644
--- a/scripts/utils/project.dml
+++ b/scripts/utils/project.dml
@@ -1,80 +1,80 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to project columns from input matrix.
-#
-# Parameters:
-#    X       : (input)  filename of data matrix
-#    P       : (input)  filename of 1-column projection matrix containing columnIDs
-#    o       : (output) filename of output matrix with projected columns
-#    exclude : (default FALSE) TRUE means P contains columnIds to be projected
-#                              FALSE means P contains columnsIDS to be excluded
-#    ofmt    : (default binary) format of output matrix
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/project.dml -nvargs X="/tmp/M.mtx" P="/tmp/P.mtx" o="/tmp/PX.mtx" 
-#
-# Assumptions:
-# The order of colIDs in P is preserved. Order of columns in result is same as order of columns in P.
-#      i.e. projecting columns 4 and 2 of X results in a matrix with columns 4 and 2.
-# If P specifies the exclude list, then projected columns are order preserved.
-
-exclude = ifdef ($exclude, FALSE);
-ofmt = ifdef ($ofmt, "binary");
-
-X = read ($X)
-P = read ($P)
-
-# create projection matrix using projection list and sequence matrix, and pad with 0s. The size of
-# PP is nbrOfColsInX x nbrOfColsToKeep
-
-if (exclude==FALSE)
-{
-   # create projection matrix using projection list and sequence matrix, and pad with 0s. The size
-   # of PP is nbrOfColsInX x nbrOfColsToKeep
-   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P)) 
-
- } else {
-   # create new vector P with list of columns to keep using original vector P containing exclude
-   # columns. These are all small vector operations.
-   C = table(P, seq(1, nrow(P), 1))
-   E = rowSums(C);
-      
-   # Row pad w/ 0s
-   EE = matrix (0, rows=ncol(X), cols=1)
-   EE[1:nrow(E),1] = E
-
-   # Convert exclude column list to include column list, and create column indices
-   EE = ppred(EE, 0, "==")
-   EE = EE * seq(1, ncol(X), 1)
-   P = removeEmpty(target=EE, margin="rows")
-
-   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P))
-
-}
-
-# Perform projection using permutation matrix
-PX = X %*% PP
-
-# Write output
-write (PX, $o, format=ofmt)
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to project columns from input matrix.
+#
+# Parameters:
+#    X       : (input)  filename of data matrix
+#    P       : (input)  filename of 1-column projection matrix containing columnIDs
+#    o       : (output) filename of output matrix with projected columns
+#    exclude : (default FALSE) TRUE means P contains columnIds to be projected
+#                              FALSE means P contains columnsIDS to be excluded
+#    ofmt    : (default binary) format of output matrix
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/project.dml -nvargs X="/tmp/M.mtx" P="/tmp/P.mtx" o="/tmp/PX.mtx" 
+#
+# Assumptions:
+# The order of colIDs in P is preserved. Order of columns in result is same as order of columns in P.
+#      i.e. projecting columns 4 and 2 of X results in a matrix with columns 4 and 2.
+# If P specifies the exclude list, then projected columns are order preserved.
+
+exclude = ifdef ($exclude, FALSE);
+ofmt = ifdef ($ofmt, "binary");
+
+X = read ($X)
+P = read ($P)
+
+# create projection matrix using projection list and sequence matrix, and pad with 0s. The size of
+# PP is nbrOfColsInX x nbrOfColsToKeep
+
+if (exclude==FALSE)
+{
+   # create projection matrix using projection list and sequence matrix, and pad with 0s. The size
+   # of PP is nbrOfColsInX x nbrOfColsToKeep
+   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P)) 
+
+ } else {
+   # create new vector P with list of columns to keep using original vector P containing exclude
+   # columns. These are all small vector operations.
+   C = table(P, seq(1, nrow(P), 1))
+   E = rowSums(C);
+      
+   # Row pad w/ 0s
+   EE = matrix (0, rows=ncol(X), cols=1)
+   EE[1:nrow(E),1] = E
+
+   # Convert exclude column list to include column list, and create column indices
+   EE = ppred(EE, 0, "==")
+   EE = EE * seq(1, ncol(X), 1)
+   P = removeEmpty(target=EE, margin="rows")
+
+   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P))
+
+}
+
+# Perform projection using permutation matrix
+PX = X %*% PP
+
+# Write output
+write (PX, $o, format=ofmt)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/utils/rowIndexMax.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/rowIndexMax.dml b/scripts/utils/rowIndexMax.dml
index 80af2e1..1e5cbc7 100644
--- a/scripts/utils/rowIndexMax.dml
+++ b/scripts/utils/rowIndexMax.dml
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to return for each row the column nbr with the largest value. If all the values in
-# a row are the same, then the largest column nbr is returned.
-#
-# Parameters:
-#    I       : (input)  filename of input
-#    O       : (output) filename of output
-#    ofmt    : default "csv"; format of O: "csv", "binary"
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/rowIndexMax.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx"
-#
-
-ofmt = ifdef($ofmt, "csv")
-
-M = read($I)
-C = rowIndexMax(M)
-write(C, $O, format=ofmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to return for each row the column nbr with the largest value. If all the values in
+# a row are the same, then the largest column nbr is returned.
+#
+# Parameters:
+#    I       : (input)  filename of input
+#    O       : (output) filename of output
+#    ofmt    : default "csv"; format of O: "csv", "binary"
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/rowIndexMax.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx"
+#
+
+ofmt = ifdef($ofmt, "csv")
+
+M = read($I)
+C = rowIndexMax(M)
+write(C, $O, format=ofmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/utils/splitXY.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/splitXY.dml b/scripts/utils/splitXY.dml
index 7d5fc24..82027a4 100644
--- a/scripts/utils/splitXY.dml
+++ b/scripts/utils/splitXY.dml
@@ -1,62 +1,62 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to split X into new X and Y.
-#
-# Parameters:
-#    X       : (input)  filename of data matrix
-#    y       : (default ncol(X))  colIndex
-#    OX      : (output) filename of output matrix with all columns except y
-#    OY      : (output) filename of output matrix with y column
-#    ofmt    : (default binary) format of OX and OY output matrix
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/splitXY.dml -nvargs X="/tmp/X.mtx" y=50 OX="/tmp/OX.mtx  OY="/tmp/OY.mtx  
-#
-
-ofmt = ifdef($ofmt, "binary")
-y = ifdef($y, ncol($X))
-
-X = read ($X)
-
-if (y == 1)
-{
-   OX = X[,y+1:ncol(X)]
-   OY = X[,y]
-} 
-else if (y == ncol(X))
-{
-   OX = X[,1:y-1]
-   OY = X[,y]
-} 
-else 
-{
-   OX1 = X[,1:y-1]
-   OX2 = X[,y+1:ncol(X)]
-   OX = append (OX1, OX2)
-   OY = X[,y]
-}
-
-# Write output
-write (OX, $OX, format=ofmt)
-write (OY, $OY, format=ofmt)
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to split X into new X and Y.
+#
+# Parameters:
+#    X       : (input)  filename of data matrix
+#    y       : (default ncol(X))  colIndex
+#    OX      : (output) filename of output matrix with all columns except y
+#    OY      : (output) filename of output matrix with y column
+#    ofmt    : (default binary) format of OX and OY output matrix
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/splitXY.dml -nvargs X="/tmp/X.mtx" y=50 OX="/tmp/OX.mtx  OY="/tmp/OY.mtx  
+#
+
+ofmt = ifdef($ofmt, "binary")
+y = ifdef($y, ncol($X))
+
+X = read ($X)
+
+if (y == 1)
+{
+   OX = X[,y+1:ncol(X)]
+   OY = X[,y]
+} 
+else if (y == ncol(X))
+{
+   OX = X[,1:y-1]
+   OY = X[,y]
+} 
+else 
+{
+   OX1 = X[,1:y-1]
+   OX2 = X[,y+1:ncol(X)]
+   OX = append (OX1, OX2)
+   OY = X[,y]
+}
+
+# Write output
+write (OX, $OX, format=ofmt)
+write (OY, $OY, format=ofmt)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/utils/write.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/write.dml b/scripts/utils/write.dml
index 7861a3a..f1c81e4 100644
--- a/scripts/utils/write.dml
+++ b/scripts/utils/write.dml
@@ -1,39 +1,39 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to change format of X.
-#
-# Parameters:
-#    I       : (input)  filename of input
-#    O       : (output) filename of output
-#    ofmt    : format of O: "csv", "binary"
-#    sep     : default ","; CSV separator in output
-#    header  : default "FALSE"; CSV header: TRUE | FALSE
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/write.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx" ofmt="binary" sep="|" header=TRUE
-#
-
-sep = ifdef($sep, ",")
-header = ifdef($header, FALSE )
-
-M = read($I)
-write(M, $O, format=$ofmt, sep=sep, header=header)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to change format of X.
+#
+# Parameters:
+#    I       : (input)  filename of input
+#    O       : (output) filename of output
+#    ofmt    : format of O: "csv", "binary"
+#    sep     : default ","; CSV separator in output
+#    header  : default "FALSE"; CSV header: TRUE | FALSE
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/write.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx" ofmt="binary" sep="|" header=TRUE
+#
+
+sep = ifdef($sep, ",")
+header = ifdef($header, FALSE )
+
+M = read($I)
+write(M, $O, format=$ofmt, sep=sep, header=header)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/lops/BinaryScalar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/BinaryScalar.java b/src/main/java/org/apache/sysml/lops/BinaryScalar.java
index 0f423e3..f61c145 100644
--- a/src/main/java/org/apache/sysml/lops/BinaryScalar.java
+++ b/src/main/java/org/apache/sysml/lops/BinaryScalar.java
@@ -1,197 +1,197 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.lops;
-
-
- 
-import org.apache.sysml.lops.LopProperties.ExecLocation;
-import org.apache.sysml.lops.LopProperties.ExecType;
-import org.apache.sysml.lops.compile.JobType;
-import org.apache.sysml.parser.Expression.*;
-
-/**
- * Lop to perform binary scalar operations. Both inputs must be scalars.
- * Example i = j + k, i = i + 1. 
- */
-
-public class BinaryScalar extends Lop 
-{	
-	
-	public enum OperationTypes {
-		ADD, SUBTRACT, SUBTRACTRIGHT, MULTIPLY, DIVIDE, MODULUS, INTDIV,
-		LESS_THAN, LESS_THAN_OR_EQUALS, GREATER_THAN, GREATER_THAN_OR_EQUALS, EQUALS, NOT_EQUALS,
-		AND, OR, 
-		LOG,POW,MAX,MIN,PRINT,
-		IQSIZE,
-		Over,
-	}
-	
-	OperationTypes operation;
-
-	/**
-	 * This overloaded constructor is used for setting exec type in case of spark backend
-	 */
-	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt, ExecType et) 
-	{
-		super(Lop.Type.BinaryCP, dt, vt);		
-		operation = op;		
-		this.addInput(input1);
-		this.addInput(input2);
-		input1.addOutput(this);
-		input2.addOutput(this);
-
-		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
-		boolean aligner = false;
-		boolean definesMRJob = false;
-		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
-	}
-	
-	/**
-	 * Constructor to perform a scalar operation
-	 * @param input
-	 * @param op
-	 */
-
-	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt) 
-	{
-		super(Lop.Type.BinaryCP, dt, vt);		
-		operation = op;		
-		this.addInput(input1);
-		this.addInput(input2);
-		input1.addOutput(this);
-		input2.addOutput(this);
-
-		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
-		boolean aligner = false;
-		boolean definesMRJob = false;
-		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties(inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
-	}
-
-	@Override
-	public String toString() {
-		return "Operation: " + operation;
-	}
-	
-	public OperationTypes getOperationType(){
-		return operation;
-	}
-
-	@Override
-	public String getInstructions(String input1, String input2, String output) throws LopsException
-	{
-		String opString = getOpcode( operation );
-		
-		
-		
-		StringBuilder sb = new StringBuilder();
-		
-		sb.append(getExecType());
-		sb.append(Lop.OPERAND_DELIMITOR);
-		
-		sb.append( opString );
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( getInputs().get(0).prepScalarInputOperand(getExecType()) );
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( getInputs().get(1).prepScalarInputOperand(getExecType()));
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( prepOutputOperand(output));
-
-		return sb.toString();
-	}
-	
-	@Override
-	public Lop.SimpleInstType getSimpleInstructionType()
-	{
-		switch (operation){
- 
-		default:
-			return SimpleInstType.Scalar;
-		}
-	}
-	
-	/**
-	 * 
-	 * @param op
-	 * @return
-	 */
-	public static String getOpcode( OperationTypes op )
-	{
-		switch ( op ) 
-		{
-			/* Arithmetic */
-			case ADD:
-				return "+";
-			case SUBTRACT:
-				return "-";
-			case MULTIPLY:
-				return "*";
-			case DIVIDE:
-				return "/";
-			case MODULUS:
-				return "%%";	
-			case INTDIV:
-				return "%/%";	
-			case POW:	
-				return "^";
-				
-			/* Relational */
-			case LESS_THAN:
-				return "<";
-			case LESS_THAN_OR_EQUALS:
-				return "<=";
-			case GREATER_THAN:
-				return ">";
-			case GREATER_THAN_OR_EQUALS:
-				return ">=";
-			case EQUALS:
-				return "==";
-			case NOT_EQUALS:
-				return "!=";
-			
-			/* Boolean */
-			case AND:
-				return "&&";
-			case OR:
-				return "||";
-			
-			/* Builtin Functions */
-			case LOG:
-				return "log";
-			case MIN:
-				return "min"; 
-			case MAX:
-				return "max"; 
-			
-			case PRINT:
-				return "print";
-				
-			case IQSIZE:
-				return "iqsize"; 
-				
-			default:
-				throw new UnsupportedOperationException("Instruction is not defined for BinaryScalar operator: " + op);
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.lops;
+
+
+ 
+import org.apache.sysml.lops.LopProperties.ExecLocation;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.compile.JobType;
+import org.apache.sysml.parser.Expression.*;
+
+/**
+ * Lop to perform binary scalar operations. Both inputs must be scalars.
+ * Example i = j + k, i = i + 1. 
+ */
+
+public class BinaryScalar extends Lop 
+{	
+	
+	public enum OperationTypes {
+		ADD, SUBTRACT, SUBTRACTRIGHT, MULTIPLY, DIVIDE, MODULUS, INTDIV,
+		LESS_THAN, LESS_THAN_OR_EQUALS, GREATER_THAN, GREATER_THAN_OR_EQUALS, EQUALS, NOT_EQUALS,
+		AND, OR, 
+		LOG,POW,MAX,MIN,PRINT,
+		IQSIZE,
+		Over,
+	}
+	
+	OperationTypes operation;
+
+	/**
+	 * This overloaded constructor is used for setting exec type in case of spark backend
+	 */
+	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt, ExecType et) 
+	{
+		super(Lop.Type.BinaryCP, dt, vt);		
+		operation = op;		
+		this.addInput(input1);
+		this.addInput(input2);
+		input1.addOutput(this);
+		input2.addOutput(this);
+
+		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
+		boolean aligner = false;
+		boolean definesMRJob = false;
+		lps.addCompatibility(JobType.INVALID);
+		this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+	}
+	
+	/**
+	 * Constructor to perform a scalar operation
+	 * @param input
+	 * @param op
+	 */
+
+	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt) 
+	{
+		super(Lop.Type.BinaryCP, dt, vt);		
+		operation = op;		
+		this.addInput(input1);
+		this.addInput(input2);
+		input1.addOutput(this);
+		input2.addOutput(this);
+
+		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
+		boolean aligner = false;
+		boolean definesMRJob = false;
+		lps.addCompatibility(JobType.INVALID);
+		this.lps.setProperties(inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+	}
+
+	@Override
+	public String toString() {
+		return "Operation: " + operation;
+	}
+	
+	public OperationTypes getOperationType(){
+		return operation;
+	}
+
+	@Override
+	public String getInstructions(String input1, String input2, String output) throws LopsException
+	{
+		String opString = getOpcode( operation );
+		
+		
+		
+		StringBuilder sb = new StringBuilder();
+		
+		sb.append(getExecType());
+		sb.append(Lop.OPERAND_DELIMITOR);
+		
+		sb.append( opString );
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( getInputs().get(0).prepScalarInputOperand(getExecType()) );
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( getInputs().get(1).prepScalarInputOperand(getExecType()));
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( prepOutputOperand(output));
+
+		return sb.toString();
+	}
+	
+	@Override
+	public Lop.SimpleInstType getSimpleInstructionType()
+	{
+		switch (operation){
+ 
+		default:
+			return SimpleInstType.Scalar;
+		}
+	}
+	
+	/**
+	 * 
+	 * @param op
+	 * @return
+	 */
+	public static String getOpcode( OperationTypes op )
+	{
+		switch ( op ) 
+		{
+			/* Arithmetic */
+			case ADD:
+				return "+";
+			case SUBTRACT:
+				return "-";
+			case MULTIPLY:
+				return "*";
+			case DIVIDE:
+				return "/";
+			case MODULUS:
+				return "%%";	
+			case INTDIV:
+				return "%/%";	
+			case POW:	
+				return "^";
+				
+			/* Relational */
+			case LESS_THAN:
+				return "<";
+			case LESS_THAN_OR_EQUALS:
+				return "<=";
+			case GREATER_THAN:
+				return ">";
+			case GREATER_THAN_OR_EQUALS:
+				return ">=";
+			case EQUALS:
+				return "==";
+			case NOT_EQUALS:
+				return "!=";
+			
+			/* Boolean */
+			case AND:
+				return "&&";
+			case OR:
+				return "||";
+			
+			/* Builtin Functions */
+			case LOG:
+				return "log";
+			case MIN:
+				return "min"; 
+			case MAX:
+				return "max"; 
+			
+			case PRINT:
+				return "print";
+				
+			case IQSIZE:
+				return "iqsize"; 
+				
+			default:
+				throw new UnsupportedOperationException("Instruction is not defined for BinaryScalar operator: " + op);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/parser/antlr4/Dml.g4
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/antlr4/Dml.g4 b/src/main/java/org/apache/sysml/parser/antlr4/Dml.g4
index 400a412..bada11e 100644
--- a/src/main/java/org/apache/sysml/parser/antlr4/Dml.g4
+++ b/src/main/java/org/apache/sysml/parser/antlr4/Dml.g4
@@ -1,201 +1,201 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-grammar Dml;
-
-@header
-{
-	// Commenting the package name and explicitly passing it in build.xml to maintain compatibility with maven plugin
-    // package org.apache.sysml.antlr4;
-}
-
-// DML Program is a list of expression
-// For now, we only allow global function definitions (not nested or inside a while block)
-dmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* EOF;
-
-statement returns [ StatementInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new StatementInfo();
-} :
-    // ------------------------------------------
-    // ImportStatement
-    'source' '(' filePath = STRING ')'  'as' namespace=ID ';'*       # ImportStatement
-    | 'setwd'  '(' pathValue = STRING ')' ';'*                          # PathStatement
-    // ------------------------------------------
-    // Treat function call as AssignmentStatement or MultiAssignmentStatement
-    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
-    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
-    | // TODO: Throw an informative error if user doesnot provide the optional assignment
-    ( targetList+=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallAssignmentStatement
-    | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallMultiAssignmentStatement
-    // {notifyErrorListeners("Too many parentheses");}
-    // ------------------------------------------
-    // AssignmentStatement
-    | targetList+=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ','  source=expression ')' ';'*   # IfdefAssignmentStatement
-    | targetList+=dataIdentifier op=('<-'|'=') source=expression ';'*   # AssignmentStatement
-    // ------------------------------------------
-    // We don't support block statement
-    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
-    // ------------------------------------------
-    // IfStatement
-    | 'if' '(' predicate=expression ')' (ifBody+=statement ';'* | '{' (ifBody+=statement ';'*)*  '}')  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
-    // ------------------------------------------
-    // ForStatement & ParForStatement
-    | 'for' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'* )*  '}')  # ForStatement
-    // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
-    | 'parfor' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'*)*  '}')  # ParForStatement
-    | 'while' '(' predicate=expression ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}')  # WhileStatement
-    // ------------------------------------------
-;
-
-iterablePredicate returns [ ExpressionInfo info ]
-  @init {
-         // This actions occurs regardless of how many alternatives in this rule
-         $info = new ExpressionInfo();
-  } :
-    from=expression ':' to=expression #IterablePredicateColonExpression
-    | ID '(' from=expression ',' to=expression ',' increment=expression ')' #IterablePredicateSeqExpression
-    ;
-
-functionStatement returns [ StatementInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new StatementInfo();
-} :
-    // ------------------------------------------
-    // FunctionStatement & ExternalFunctionStatement
-    // small change: only allow typed arguments here ... instead of data identifier
-    name=ID ('<-'|'=') 'function' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? '{' (body+=statement ';'*)* '}' # InternalFunctionDefExpression
-    | name=ID ('<-'|'=') 'externalFunction' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )?   'implemented' 'in' '(' ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? ')' ';'*    # ExternalFunctionDefExpression
-    // ------------------------------------------
-;
-
-
-// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
-dataIdentifier returns [ ExpressionInfo dataInfo ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $dataInfo = new ExpressionInfo();
-       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
-} :
-    // ------------------------------------------
-    // IndexedIdentifier
-    name=ID '[' (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? ']' # IndexedExpression
-    // ------------------------------------------
-    | ID                                            # SimpleDataIdentifierExpression
-    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
-    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
-;
-expression returns [ ExpressionInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new ExpressionInfo();
-       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
-} :
-    // ------------------------------------------
-    // BinaryExpression
-    // power
-    <assoc=right> left=expression op='^' right=expression  # PowerExpression
-    // unary plus and minus
-    | op=('-'|'+') left=expression                        # UnaryExpression
-    // sequence - since we are only using this into for
-    //| left=expression op=':' right=expression             # SequenceExpression
-    // matrix multiply
-    | left=expression op='%*%' right=expression           # MatrixMulExpression
-    // modulus and integer division
-    | left=expression op=('%/%' | '%%' ) right=expression # ModIntDivExpression
-    // arithmetic multiply and divide
-    | left=expression op=('*'|'/') right=expression       # MultDivExpression
-    // arithmetic addition and subtraction
-    | left=expression op=('+'|'-') right=expression       # AddSubExpression
-    // ------------------------------------------
-    // RelationalExpression
-    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
-    // ------------------------------------------
-    // BooleanExpression
-    // boolean not
-    | op='!' left=expression # BooleanNotExpression
-    // boolean and
-    | left=expression op=('&'|'&&') right=expression # BooleanAndExpression
-    // boolean or
-    | left=expression op=('|'|'||') right=expression # BooleanOrExpression
-
-    // ---------------------------------
-    // only applicable for builtin function expressions
-    | name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # BuiltinFunctionExpression
-
-    // 4. Atomic
-    | '(' left=expression ')'                       # AtomicExpression
-
-    // Should you allow indexed expression here ?
-    // | '[' targetList+=expression (',' targetList+=expression)* ']'  # MultiIdExpression
-
-    // | BOOLEAN                                       # ConstBooleanIdExpression
-    | 'TRUE'                                        # ConstTrueExpression
-    | 'FALSE'                                       # ConstFalseExpression
-    | INT                                           # ConstIntIdExpression
-    | DOUBLE                                        # ConstDoubleIdExpression
-    | STRING                                        # ConstStringIdExpression
-    | dataIdentifier                                # DataIdExpression
-    // Special
-    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
-;
-
-typedArgNoAssign : paramType=ml_type paramName=ID;
-parameterizedExpression : (paramName=ID '=')? paramVal=expression;
-strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
-strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
-ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '::')? ALPHABET (ALPHABET|DIGIT|'_')*
-    // Special ID cases:
-   // | 'matrix' // --> This is a special case which causes lot of headache
-   | 'as.scalar' | 'as.matrix' | 'as.double' | 'as.integer' | 'as.logical' | 'index.return' | 'lower.tail'
-;
-// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
-// Therefore, ugly work around for checking datatype
-ml_type :  valueType | dataType '[' valueType ']';
-// Note to reduce number of keywords, these are case-sensitive,
-// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
-valueType: 'int' | 'integer' | 'string' | 'boolean' | 'double'
-            | 'Int' | 'Integer' | 'String' | 'Boolean' | 'Double';
-dataType:
-        // 'scalar' # ScalarDataTypeDummyCheck
-        // |
-        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
-        //|  'matrix' //---> See ID, this causes lot of headache
-        ;
-INT : DIGIT+  [Ll]?;
-// BOOLEAN : 'TRUE' | 'FALSE';
-DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
-| DIGIT+ EXP? [Ll]?
-| '.' DIGIT+ EXP? [Ll]?
-;
-DIGIT: '0'..'9';
-ALPHABET : [a-zA-Z] ;
-fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
-COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
-COMMANDLINE_POSITION_ID: '$' DIGIT+;
-
-// supports single and double quoted string with escape characters
-STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
-fragment ESC : '\\' [abtnfrv"'\\] ;
-// Comments, whitespaces and new line
-LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
-MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
-WHITESPACE : (' ' | '\t' | '\r' | '\n')+ -> skip ;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+grammar Dml;
+
+@header
+{
+	// Commenting the package name and explicitly passing it in build.xml to maintain compatibility with maven plugin
+    // package org.apache.sysml.parser.dml;
+}
+
+// DML Program is a list of expression
+// For now, we only allow global function definitions (not nested or inside a while block)
+dmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* EOF;
+
+statement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // ImportStatement
+    'source' '(' filePath = STRING ')'  'as' namespace=ID ';'*       # ImportStatement
+    | 'setwd'  '(' pathValue = STRING ')' ';'*                          # PathStatement
+    // ------------------------------------------
+    // Treat function call as AssignmentStatement or MultiAssignmentStatement
+    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
+    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
+    | // TODO: Throw an informative error if user doesnot provide the optional assignment
+    ( targetList+=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallAssignmentStatement
+    | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallMultiAssignmentStatement
+    // {notifyErrorListeners("Too many parentheses");}
+    // ------------------------------------------
+    // AssignmentStatement
+    | targetList+=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ','  source=expression ')' ';'*   # IfdefAssignmentStatement
+    | targetList+=dataIdentifier op=('<-'|'=') source=expression ';'*   # AssignmentStatement
+    // ------------------------------------------
+    // We don't support block statement
+    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
+    // ------------------------------------------
+    // IfStatement
+    | 'if' '(' predicate=expression ')' (ifBody+=statement ';'* | '{' (ifBody+=statement ';'*)*  '}')  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
+    // ------------------------------------------
+    // ForStatement & ParForStatement
+    | 'for' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'* )*  '}')  # ForStatement
+    // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
+    | 'parfor' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'*)*  '}')  # ParForStatement
+    | 'while' '(' predicate=expression ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}')  # WhileStatement
+    // ------------------------------------------
+;
+
+iterablePredicate returns [ ExpressionInfo info ]
+  @init {
+         // This actions occurs regardless of how many alternatives in this rule
+         $info = new ExpressionInfo();
+  } :
+    from=expression ':' to=expression #IterablePredicateColonExpression
+    | ID '(' from=expression ',' to=expression ',' increment=expression ')' #IterablePredicateSeqExpression
+    ;
+
+functionStatement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // FunctionStatement & ExternalFunctionStatement
+    // small change: only allow typed arguments here ... instead of data identifier
+    name=ID ('<-'|'=') 'function' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? '{' (body+=statement ';'*)* '}' # InternalFunctionDefExpression
+    | name=ID ('<-'|'=') 'externalFunction' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )?   'implemented' 'in' '(' ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? ')' ';'*    # ExternalFunctionDefExpression
+    // ------------------------------------------
+;
+
+
+// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
+dataIdentifier returns [ ExpressionInfo dataInfo ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $dataInfo = new ExpressionInfo();
+       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
+} :
+    // ------------------------------------------
+    // IndexedIdentifier
+    name=ID '[' (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? ']' # IndexedExpression
+    // ------------------------------------------
+    | ID                                            # SimpleDataIdentifierExpression
+    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
+    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
+;
+expression returns [ ExpressionInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new ExpressionInfo();
+       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
+} :
+    // ------------------------------------------
+    // BinaryExpression
+    // power
+    <assoc=right> left=expression op='^' right=expression  # PowerExpression
+    // unary plus and minus
+    | op=('-'|'+') left=expression                        # UnaryExpression
+    // sequence - since we are only using this into for
+    //| left=expression op=':' right=expression             # SequenceExpression
+    // matrix multiply
+    | left=expression op='%*%' right=expression           # MatrixMulExpression
+    // modulus and integer division
+    | left=expression op=('%/%' | '%%' ) right=expression # ModIntDivExpression
+    // arithmetic multiply and divide
+    | left=expression op=('*'|'/') right=expression       # MultDivExpression
+    // arithmetic addition and subtraction
+    | left=expression op=('+'|'-') right=expression       # AddSubExpression
+    // ------------------------------------------
+    // RelationalExpression
+    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
+    // ------------------------------------------
+    // BooleanExpression
+    // boolean not
+    | op='!' left=expression # BooleanNotExpression
+    // boolean and
+    | left=expression op=('&'|'&&') right=expression # BooleanAndExpression
+    // boolean or
+    | left=expression op=('|'|'||') right=expression # BooleanOrExpression
+
+    // ---------------------------------
+    // only applicable for builtin function expressions
+    | name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # BuiltinFunctionExpression
+
+    // 4. Atomic
+    | '(' left=expression ')'                       # AtomicExpression
+
+    // Should you allow indexed expression here ?
+    // | '[' targetList+=expression (',' targetList+=expression)* ']'  # MultiIdExpression
+
+    // | BOOLEAN                                       # ConstBooleanIdExpression
+    | 'TRUE'                                        # ConstTrueExpression
+    | 'FALSE'                                       # ConstFalseExpression
+    | INT                                           # ConstIntIdExpression
+    | DOUBLE                                        # ConstDoubleIdExpression
+    | STRING                                        # ConstStringIdExpression
+    | dataIdentifier                                # DataIdExpression
+    // Special
+    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
+;
+
+typedArgNoAssign : paramType=ml_type paramName=ID;
+parameterizedExpression : (paramName=ID '=')? paramVal=expression;
+strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
+strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
+ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '::')? ALPHABET (ALPHABET|DIGIT|'_')*
+    // Special ID cases:
+   // | 'matrix' // --> This is a special case which causes lot of headache
+   | 'as.scalar' | 'as.matrix' | 'as.double' | 'as.integer' | 'as.logical' | 'index.return' | 'lower.tail'
+;
+// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
+// Therefore, ugly work around for checking datatype
+ml_type :  valueType | dataType '[' valueType ']';
+// Note to reduce number of keywords, these are case-sensitive,
+// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
+valueType: 'int' | 'integer' | 'string' | 'boolean' | 'double'
+            | 'Int' | 'Integer' | 'String' | 'Boolean' | 'Double';
+dataType:
+        // 'scalar' # ScalarDataTypeDummyCheck
+        // |
+        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
+        //|  'matrix' //---> See ID, this causes lot of headache
+        ;
+INT : DIGIT+  [Ll]?;
+// BOOLEAN : 'TRUE' | 'FALSE';
+DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
+| DIGIT+ EXP? [Ll]?
+| '.' DIGIT+ EXP? [Ll]?
+;
+DIGIT: '0'..'9';
+ALPHABET : [a-zA-Z] ;
+fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
+COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
+COMMANDLINE_POSITION_ID: '$' DIGIT+;
+
+// supports single and double quoted string with escape characters
+STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
+fragment ESC : '\\' [abtnfrv"'\\] ;
+// Comments, whitespaces and new line
+LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
+MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
+WHITESPACE : (' ' | '\t' | '\r' | '\n')+ -> skip ;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
index b2d37f9..77d5282 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
@@ -1,292 +1,292 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor;
-
-import java.io.IOException;
-import java.util.HashMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Counters.Group;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.conf.DMLConfig;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
-import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.io.MatrixReader;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.util.MapReduceTool;
-import org.apache.sysml.utils.Statistics;
-import org.apache.sysml.yarn.DMLAppMasterUtils;
-
-/**
- * MR job class for submitting parfor remote MR jobs, controlling its execution and obtaining results.
- * 
- *
- */
-public class RemoteDPParForMR
-{
-	
-	protected static final Log LOG = LogFactory.getLog(RemoteDPParForMR.class.getName());
-	
-	/**
-	 * 
-	 * @param pfid
-	 * @param program
-	 * @param taskFile
-	 * @param resultFile
-	 * @param enableCPCaching 
-	 * @param mode
-	 * @param numMappers
-	 * @param replication
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, String resultFile, MatrixObject input, 
-			                                   PDataPartitionFormat dpf, OutputInfo oi, boolean tSparseCol, //config params
-			                                   boolean enableCPCaching, int numReducers, int replication, int max_retry)  //opt params
-		throws DMLRuntimeException
-	{
-		RemoteParForJobReturn ret = null;
-		String jobname = "ParFor-DPEMR";
-		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
-		JobConf job;
-		job = new JobConf( RemoteDPParForMR.class );
-		job.setJobName(jobname+pfid);
-		
-		//maintain dml script counters
-		Statistics.incrementNoOfCompiledMRJobs();
-	
-		try
-		{
-			/////
-			//configure the MR job
-		
-			//set arbitrary CP program blocks that will perform in the reducers
-			MRJobConfiguration.setProgramBlocks(job, program); 
-
-			//enable/disable caching
-			MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
-		
-			//setup input matrix
-			Path path = new Path( input.getFileName() );
-			long rlen = input.getNumRows();
-			long clen = input.getNumColumns();
-			int brlen = (int) input.getNumRowsPerBlock();
-			int bclen = (int) input.getNumColumnsPerBlock();
-			MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, InputInfo.BinaryBlockInputInfo, oi, dpf, 1, input.getFileName(), itervar, matrixvar, tSparseCol);
-			job.setInputFormat(InputInfo.BinaryBlockInputInfo.inputFormatClass);
-			FileInputFormat.setInputPaths(job, path);
-			
-			//set mapper and reducers classes
-			job.setMapperClass(DataPartitionerRemoteMapper.class); 
-			job.setReducerClass(RemoteDPParWorkerReducer.class); 
-			
-		    //set output format
-		    job.setOutputFormat(SequenceFileOutputFormat.class);
-		    
-		    //set output path
-		    MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
-		    FileOutputFormat.setOutputPath(job, new Path(resultFile));
-		    
-			//set the output key, value schema
-		    
-		    //parfor partitioning outputs (intermediates)
-		    job.setMapOutputKeyClass(LongWritable.class);
-		    if( oi == OutputInfo.BinaryBlockOutputInfo )
-		    	job.setMapOutputValueClass(PairWritableBlock.class); 
-		    else if( oi == OutputInfo.BinaryCellOutputInfo )
-		    	job.setMapOutputValueClass(PairWritableCell.class);
-		    else 
-		    	throw new DMLRuntimeException("Unsupported intermrediate output info: "+oi);
-		    //parfor exec output
-		    job.setOutputKeyClass(LongWritable.class);
-			job.setOutputValueClass(Text.class);
-			
-			//////
-			//set optimization parameters
-
-			//set the number of mappers and reducers 
-			job.setNumReduceTasks( numReducers );			
-			
-			//disable automatic tasks timeouts and speculative task exec
-			job.setInt("mapred.task.timeout", 0);			
-			job.setMapSpeculativeExecution(false);
-			
-			//set up preferred custom serialization framework for binary block format
-			if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
-				MRJobConfiguration.addBinaryBlockSerializationFramework( job );
-	
-			//set up map/reduce memory configurations (if in AM context)
-			DMLConfig config = ConfigurationManager.getConfig();
-			DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
-			
-			//disable JVM reuse
-			job.setNumTasksToExecutePerJvm( 1 ); //-1 for unlimited 
-			
-			//set the replication factor for the results
-			job.setInt("dfs.replication", replication);
-			
-			//set the max number of retries per map task
-			//note: currently disabled to use cluster config
-			//job.setInt("mapreduce.map.maxattempts", max_retry);
-			
-			//set unique working dir
-			MRJobConfiguration.setUniqueWorkingDir(job);
-			
-			/////
-			// execute the MR job			
-			RunningJob runjob = JobClient.runJob(job);
-			
-			// Process different counters 
-			Statistics.incrementNoOfExecutedMRJobs();
-			Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
-			int numTasks = (int)pgroup.getCounter( Stat.PARFOR_NUMTASKS.toString() );
-			int numIters = (int)pgroup.getCounter( Stat.PARFOR_NUMITERS.toString() );
-			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode() ) {
-				Statistics.incrementJITCompileTime( pgroup.getCounter( Stat.PARFOR_JITCOMPILE.toString() ) );
-				Statistics.incrementJVMgcCount( pgroup.getCounter( Stat.PARFOR_JVMGC_COUNT.toString() ) );
-				Statistics.incrementJVMgcTime( pgroup.getCounter( Stat.PARFOR_JVMGC_TIME.toString() ) );
-				Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
-				CacheStatistics.incrementMemHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_MEM.toString() ));
-				CacheStatistics.incrementFSBuffHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString() ));
-				CacheStatistics.incrementFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FS.toString() ));
-				CacheStatistics.incrementHDFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_HDFS.toString() ));
-				CacheStatistics.incrementFSBuffWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString() ));
-				CacheStatistics.incrementFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FS.toString() ));
-				CacheStatistics.incrementHDFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_HDFS.toString() ));
-				CacheStatistics.incrementAcquireRTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQR.toString() ));
-				CacheStatistics.incrementAcquireMTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQM.toString() ));
-				CacheStatistics.incrementReleaseTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_RLS.toString() ));
-				CacheStatistics.incrementExportTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_EXP.toString() ));
-			}
-				
-			// read all files of result variables and prepare for return
-			LocalVariableMap[] results = readResultFile(job, resultFile); 
-
-			ret = new RemoteParForJobReturn(runjob.isSuccessful(), 
-					                        numTasks, numIters, 
-					                        results);  	
-		}
-		catch(Exception ex)
-		{
-			throw new DMLRuntimeException(ex);
-		}
-		finally
-		{
-			// remove created files 
-			try
-			{
-				MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
-			}
-			catch(IOException ex)
-			{
-				throw new DMLRuntimeException(ex);
-			}
-		}
-		
-		if( DMLScript.STATISTICS ){
-			long t1 = System.nanoTime();
-			Statistics.maintainCPHeavyHitters("MR-Job_"+jobname, t1-t0);
-		}
-		
-		return ret;
-	}
-	
-
-	/**
-	 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
-	 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
-	 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
-	 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
-	 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
-	 * runtime implementation). 
-	 * 
-	 * @param job 
-	 * @param fname
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	@SuppressWarnings("deprecation")
-	public static LocalVariableMap [] readResultFile( JobConf job, String fname )
-		throws DMLRuntimeException, IOException
-	{
-		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
-
-		FileSystem fs = FileSystem.get(job);
-		Path path = new Path(fname);
-		LongWritable key = new LongWritable(); //workerID
-		Text value = new Text();               //serialized var header (incl filename)
-		
-		int countAll = 0;
-		for( Path lpath : MatrixReader.getSequenceFilePaths(fs, path) )
-		{
-			SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),lpath,job);
-			try
-			{
-				while( reader.next(key, value) )
-				{
-					//System.out.println("key="+key.get()+", value="+value.toString());
-					if( !tmp.containsKey( key.get() ) )
-		        		tmp.put(key.get(), new LocalVariableMap ());	   
-					Object[] dat = ProgramConverter.parseDataObject( value.toString() );
-		        	tmp.get( key.get() ).put((String)dat[0], (Data)dat[1]);
-		        	countAll++;
-				}
-			}	
-			finally
-			{
-				if( reader != null )
-					reader.close();
-			}
-		}		
-
-		LOG.debug("Num remote worker results (before deduplication): "+countAll);
-		LOG.debug("Num remote worker results: "+tmp.size());
-
-		//create return array
-		return tmp.values().toArray(new LocalVariableMap[0]);	
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.controlprogram.parfor;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Counters.Group;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
+import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
+import org.apache.sysml.runtime.instructions.cp.Data;
+import org.apache.sysml.runtime.io.MatrixReader;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.util.MapReduceTool;
+import org.apache.sysml.utils.Statistics;
+import org.apache.sysml.yarn.DMLAppMasterUtils;
+
+/**
+ * MR job class for submitting parfor remote MR jobs, controlling its execution and obtaining results.
+ * 
+ *
+ */
+public class RemoteDPParForMR
+{
+	
+	protected static final Log LOG = LogFactory.getLog(RemoteDPParForMR.class.getName());
+	
+	/**
+	 * 
+	 * @param pfid
+	 * @param program
+	 * @param taskFile
+	 * @param resultFile
+	 * @param enableCPCaching 
+	 * @param mode
+	 * @param numMappers
+	 * @param replication
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, String resultFile, MatrixObject input, 
+			                                   PDataPartitionFormat dpf, OutputInfo oi, boolean tSparseCol, //config params
+			                                   boolean enableCPCaching, int numReducers, int replication, int max_retry)  //opt params
+		throws DMLRuntimeException
+	{
+		RemoteParForJobReturn ret = null;
+		String jobname = "ParFor-DPEMR";
+		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+		
+		JobConf job;
+		job = new JobConf( RemoteDPParForMR.class );
+		job.setJobName(jobname+pfid);
+		
+		//maintain dml script counters
+		Statistics.incrementNoOfCompiledMRJobs();
+	
+		try
+		{
+			/////
+			//configure the MR job
+		
+			//set arbitrary CP program blocks that will perform in the reducers
+			MRJobConfiguration.setProgramBlocks(job, program); 
+
+			//enable/disable caching
+			MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
+		
+			//setup input matrix
+			Path path = new Path( input.getFileName() );
+			long rlen = input.getNumRows();
+			long clen = input.getNumColumns();
+			int brlen = (int) input.getNumRowsPerBlock();
+			int bclen = (int) input.getNumColumnsPerBlock();
+			MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, InputInfo.BinaryBlockInputInfo, oi, dpf, 1, input.getFileName(), itervar, matrixvar, tSparseCol);
+			job.setInputFormat(InputInfo.BinaryBlockInputInfo.inputFormatClass);
+			FileInputFormat.setInputPaths(job, path);
+			
+			//set mapper and reducers classes
+			job.setMapperClass(DataPartitionerRemoteMapper.class); 
+			job.setReducerClass(RemoteDPParWorkerReducer.class); 
+			
+		    //set output format
+		    job.setOutputFormat(SequenceFileOutputFormat.class);
+		    
+		    //set output path
+		    MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
+		    FileOutputFormat.setOutputPath(job, new Path(resultFile));
+		    
+			//set the output key, value schema
+		    
+		    //parfor partitioning outputs (intermediates)
+		    job.setMapOutputKeyClass(LongWritable.class);
+		    if( oi == OutputInfo.BinaryBlockOutputInfo )
+		    	job.setMapOutputValueClass(PairWritableBlock.class); 
+		    else if( oi == OutputInfo.BinaryCellOutputInfo )
+		    	job.setMapOutputValueClass(PairWritableCell.class);
+		    else 
+		    	throw new DMLRuntimeException("Unsupported intermrediate output info: "+oi);
+		    //parfor exec output
+		    job.setOutputKeyClass(LongWritable.class);
+			job.setOutputValueClass(Text.class);
+			
+			//////
+			//set optimization parameters
+
+			//set the number of mappers and reducers 
+			job.setNumReduceTasks( numReducers );			
+			
+			//disable automatic tasks timeouts and speculative task exec
+			job.setInt("mapred.task.timeout", 0);			
+			job.setMapSpeculativeExecution(false);
+			
+			//set up preferred custom serialization framework for binary block format
+			if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
+				MRJobConfiguration.addBinaryBlockSerializationFramework( job );
+	
+			//set up map/reduce memory configurations (if in AM context)
+			DMLConfig config = ConfigurationManager.getConfig();
+			DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
+			
+			//disable JVM reuse
+			job.setNumTasksToExecutePerJvm( 1 ); //-1 for unlimited 
+			
+			//set the replication factor for the results
+			job.setInt("dfs.replication", replication);
+			
+			//set the max number of retries per map task
+			//note: currently disabled to use cluster config
+			//job.setInt("mapreduce.map.maxattempts", max_retry);
+			
+			//set unique working dir
+			MRJobConfiguration.setUniqueWorkingDir(job);
+			
+			/////
+			// execute the MR job			
+			RunningJob runjob = JobClient.runJob(job);
+			
+			// Process different counters 
+			Statistics.incrementNoOfExecutedMRJobs();
+			Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
+			int numTasks = (int)pgroup.getCounter( Stat.PARFOR_NUMTASKS.toString() );
+			int numIters = (int)pgroup.getCounter( Stat.PARFOR_NUMITERS.toString() );
+			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode() ) {
+				Statistics.incrementJITCompileTime( pgroup.getCounter( Stat.PARFOR_JITCOMPILE.toString() ) );
+				Statistics.incrementJVMgcCount( pgroup.getCounter( Stat.PARFOR_JVMGC_COUNT.toString() ) );
+				Statistics.incrementJVMgcTime( pgroup.getCounter( Stat.PARFOR_JVMGC_TIME.toString() ) );
+				Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
+				CacheStatistics.incrementMemHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_MEM.toString() ));
+				CacheStatistics.incrementFSBuffHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString() ));
+				CacheStatistics.incrementFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FS.toString() ));
+				CacheStatistics.incrementHDFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_HDFS.toString() ));
+				CacheStatistics.incrementFSBuffWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString() ));
+				CacheStatistics.incrementFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FS.toString() ));
+				CacheStatistics.incrementHDFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_HDFS.toString() ));
+				CacheStatistics.incrementAcquireRTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQR.toString() ));
+				CacheStatistics.incrementAcquireMTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQM.toString() ));
+				CacheStatistics.incrementReleaseTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_RLS.toString() ));
+				CacheStatistics.incrementExportTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_EXP.toString() ));
+			}
+				
+			// read all files of result variables and prepare for return
+			LocalVariableMap[] results = readResultFile(job, resultFile); 
+
+			ret = new RemoteParForJobReturn(runjob.isSuccessful(), 
+					                        numTasks, numIters, 
+					                        results);  	
+		}
+		catch(Exception ex)
+		{
+			throw new DMLRuntimeException(ex);
+		}
+		finally
+		{
+			// remove created files 
+			try
+			{
+				MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
+			}
+			catch(IOException ex)
+			{
+				throw new DMLRuntimeException(ex);
+			}
+		}
+		
+		if( DMLScript.STATISTICS ){
+			long t1 = System.nanoTime();
+			Statistics.maintainCPHeavyHitters("MR-Job_"+jobname, t1-t0);
+		}
+		
+		return ret;
+	}
+	
+
+	/**
+	 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
+	 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
+	 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
+	 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
+	 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
+	 * runtime implementation). 
+	 * 
+	 * @param job 
+	 * @param fname
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	@SuppressWarnings("deprecation")
+	public static LocalVariableMap [] readResultFile( JobConf job, String fname )
+		throws DMLRuntimeException, IOException
+	{
+		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
+
+		FileSystem fs = FileSystem.get(job);
+		Path path = new Path(fname);
+		LongWritable key = new LongWritable(); //workerID
+		Text value = new Text();               //serialized var header (incl filename)
+		
+		int countAll = 0;
+		for( Path lpath : MatrixReader.getSequenceFilePaths(fs, path) )
+		{
+			SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),lpath,job);
+			try
+			{
+				while( reader.next(key, value) )
+				{
+					//System.out.println("key="+key.get()+", value="+value.toString());
+					if( !tmp.containsKey( key.get() ) )
+		        		tmp.put(key.get(), new LocalVariableMap ());	   
+					Object[] dat = ProgramConverter.parseDataObject( value.toString() );
+		        	tmp.get( key.get() ).put((String)dat[0], (Data)dat[1]);
+		        	countAll++;
+				}
+			}	
+			finally
+			{
+				if( reader != null )
+					reader.close();
+			}
+		}		
+
+		LOG.debug("Num remote worker results (before deduplication): "+countAll);
+		LOG.debug("Num remote worker results: "+tmp.size());
+
+		//create return array
+		return tmp.values().toArray(new LocalVariableMap[0]);	
+	}
+}


[49/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/GLM-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM-predict.dml b/scripts/algorithms/GLM-predict.dml
index 9edfaf2..5e998e3 100644
--- a/scripts/algorithms/GLM-predict.dml
+++ b/scripts/algorithms/GLM-predict.dml
@@ -1,444 +1,444 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 
-# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A GLM-TYPE REGRESSION TO A NEW (TEST) DATASET
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X     String  ---     Location to read the matrix X of records (feature vectors)
-# B     String  ---     Location to read GLM regression parameters (the betas), with dimensions
-#                           ncol(X)   x k: do not add intercept
-#                           ncol(X)+1 x k: add intercept as given by the last B-row
-#                           if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
-# M     String  " "     Location to write the matrix of predicted response means/probabilities:
-#                           nrow(X) x 1  : for Power-type distributions (dfam=1)
-#                           nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
-#                           nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
-# Y     String  " "     Location to read response matrix Y, with the following dimensions:
-#                           nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
-#                           nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
-#                           nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# dfam  Int     1       GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
-# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link  Int     0       Link function code: 0 = canonical (depends on distribution), 1 = Power,
-#                       2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
-# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
-#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# disp  Double  1.0     Dispersion value, when available
-# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: Matrix M of predicted means/probabilities, some statistics in CSV format (see below)
-# The statistics are printed one per each line, in the following CSV format:
-# NAME,[COLUMN],[SCALED],VALUE
-#   NAME   is the string identifier for the statistic, see the table below.
-#   COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
-#          note that a Binomial/Multinomial one-column Y input is converted into multi-column.
-#   SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
-#          dispersion parameter (disp) scaling has been applied to this statistic.
-#   VALUE  is the value of the statistic.
-#
-# NAME                  COLUMN  SCALED  MEANING
-# ---------------------------------------------------------------------------------------------
-# LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
-# LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
-# PEARSON_X2                      +     Pearson residual X^2 statistic
-# PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
-# PEARSON_X2_PVAL                 +     Pearson X^2 p-value
-# DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
-# DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
-# DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
-# AVG_TOT_Y               +             Average of Y column for a single response value
-# STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
-# AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
-# STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
-# PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
-# PLAIN_R2                +             Plain R^2 of Y column residual with bias included
-# ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
-# PLAIN_R2_NOBIAS         +             Plain R^2 of Y column residual with bias subtracted
-# ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
-# ---------------------------------------------------------------------------------------------
-#
-# Example with distribution = "Poisson.log":
-# hadoop jar SystemML.jar -f GLM_HOME/GLM-predict.dml -nvargs dfam=1 vpow=1.0 link=1 lpow=0.0
-#   disp=3.0 fmt=csv X=INPUT_DIR/X B=INPUT_DIR/B Y=INPUT_DIR/Y M=OUTPUT_DIR/M O=OUTPUT_DIR/out.csv
-
-# Default values for input parameters:
-fileX = $X;
-fileB = $B;
-fileM = ifdef ($M, " ");
-fileY = ifdef ($Y, " ");
-fileO = ifdef ($O, " ");
-fmtM  = ifdef ($fmt, "text");
-
-dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
-var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
-link_type  = ifdef ($link, 0);    # $link = 0;
-link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
-dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
-
-var_power  = as.double (var_power);
-link_power = as.double (link_power); 
-dispersion = as.double (dispersion);
-
-if (dist_type == 3) {
-    link_type = 2;
-} else { if (link_type == 0) { # Canonical Link
-    if (dist_type == 1) {
-        link_type = 1;
-        link_power = 1.0 - var_power;
-    } else { if (dist_type == 2) {
-            link_type = 2;
-}}} }
-
-X = read (fileX);
-num_records  = nrow (X);
-num_features = ncol (X);
-
-B_full = read (fileB);
-if (dist_type == 3) {
-    beta =  B_full [1 : ncol (X),  ];
-    intercept = B_full [nrow(B_full),  ];
-} else {
-    beta =  B_full [1 : ncol (X), 1];
-    intercept = B_full [nrow(B_full), 1];
-}
-if (nrow (B_full) == ncol (X)) {
-    intercept = 0.0 * intercept;
-    is_intercept = FALSE;
-} else {
-    num_features = num_features + 1;
-    is_intercept = TRUE;
-}
-
-ones_rec = matrix (1, rows = num_records, cols = 1);
-linear_terms = X %*% beta + ones_rec %*% intercept;
-[means, vars] =
-    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
-    
-if (fileM != " ") {
-    write (means, fileM, format=fmtM);
-}
-
-if (fileY != " ")
-{
-    Y = read (fileY);
-    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-    
-    # Statistics To Compute:
-    
-    Z_logl               = 0.0 / 0.0;
-    Z_logl_pValue        = 0.0 / 0.0;
-    X2_pearson           = 0.0 / 0.0;
-    df_pearson           = -1;
-    G2_deviance          = 0.0 / 0.0;
-    df_deviance          = -1;
-    X2_pearson_pValue    = 0.0 / 0.0;
-    G2_deviance_pValue   = 0.0 / 0.0;
-    Z_logl_scaled        = 0.0 / 0.0;
-    Z_logl_scaled_pValue = 0.0 / 0.0;
-    X2_scaled            = 0.0 / 0.0;
-    X2_scaled_pValue     = 0.0 / 0.0;
-    G2_scaled            = 0.0 / 0.0;
-    G2_scaled_pValue     = 0.0 / 0.0;
-    
-    if (dist_type == 1 & link_type == 1) {
-    #
-    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
-    #
-        if (link_power == 0.0) {
-            is_zero_Y = ppred (Y, 0.0, "==");
-            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
-        } else {
-            lt_saturated = Y ^ link_power;
-        }
-        Y_counts = ones_rec;
-
-        X2_pearson = sum ((Y - means) ^ 2 / vars);
-        df_pearson = num_records - num_features;
-
-        log_l_part = 
-            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
-        log_l_part_saturated = 
-            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
-            
-        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
-        df_deviance = num_records - num_features;
-        
-    } else { if (dist_type >= 2) {
-    #
-    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
-    #
-        if (ncol (Y) == 1) {
-            num_categories = ncol (beta) + 1;
-            if (min (Y) <= 0) { 
-                # Category labels "0", "-1" etc. are converted into the baseline label
-                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
-            }
-            Y_size = min (num_categories, max(Y));
-            Y_unsized = table (seq (1, num_records, 1), Y);
-            Y = matrix (0, rows = num_records, cols = num_categories);
-            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
-            Y_counts = ones_rec;
-        } else {
-            Y_counts = rowSums (Y);
-        }
-        
-        P = means;
-        zero_Y = ppred (Y, 0.0, "==");
-        zero_P = ppred (P, 0.0, "==");
-        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-        
-        logl_vec = rowSums (Y *  log (P + zero_Y)   );
-        ent1_vec = rowSums (P *  log (P + zero_P)   );
-        ent2_vec = rowSums (P * (log (P + zero_P))^2);
-        E_logl   = sum (Y_counts * ent1_vec);
-        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
-        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
-        
-        means = means * (Y_counts %*% t(ones_ctg));
-        vars  = vars  * (Y_counts %*% t(ones_ctg));
-        
-        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
-        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
-        
-        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
-            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
-        }
-        
-        X2_pearson = sum ((Y - means) ^ 2 / means);
-        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
-        
-        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
-        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
-    }}
-    
-    if (Z_logl == Z_logl) {
-        Z_logl_absneg = - abs (Z_logl);
-        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
-    }
-    if (X2_pearson == X2_pearson & df_pearson > 0) {
-        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_deviance == G2_deviance & df_deviance > 0) {
-        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    Z_logl_scaled = Z_logl / sqrt (dispersion);
-    X2_scaled = X2_pearson / dispersion;
-    G2_scaled = G2_deviance / dispersion;
-
-    if (Z_logl_scaled == Z_logl_scaled) {
-        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
-        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
-    }
-    if (X2_scaled == X2_scaled & df_pearson > 0) {
-        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_scaled == G2_scaled & df_deviance > 0) {
-        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
-    avg_res_Y = colSums (Y - means) / sum (Y_counts);
-    
-    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
-    ss_res_Y     = colSums ((Y - means) ^ 2);
-    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
-    
-    df_ss_res_Y  = sum (Y_counts) - num_features;
-    if (is_intercept) {
-        df_ss_avg_res_Y = df_ss_res_Y;
-    } else {
-        df_ss_avg_res_Y = df_ss_res_Y - 1;
-    }
-    
-    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
-    if (df_ss_avg_res_Y > 0) {
-        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
-    } else {
-        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
-    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
-    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
-    if (df_ss_res_Y > 0) {
-        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
-    } else {
-        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    
-    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
-    
-    # PREPARING THE OUTPUT CSV STATISTICS FILE
-    
-    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
-    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
-    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
-    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
-    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
-    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
-    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
-    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
-    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
-    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
-    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
-    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
-
-    for (i in 1:ncol(Y)) {
-        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
-        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
-        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
-        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
-        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
-        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
-        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
-        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
-        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
-    }
-    
-    if (fileO != " ") {
-        write (str, fileO);
-    } else {
-        print (str);
-    }
-}
-
-glm_means_and_vars = 
-    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
-    return (Matrix[double] means, Matrix[double] vars)
-    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
-{
-    num_points = nrow (linear_terms);
-    if (dist_type == 1 & link_type == 1) {
-    # POWER DISTRIBUTION
-        if          (link_power ==  0.0) {
-            y_mean = exp (linear_terms);
-        } else { if (link_power ==  1.0) {
-            y_mean = linear_terms;
-        } else { if (link_power == -1.0) {
-            y_mean = 1.0 / linear_terms;
-        } else {
-            y_mean = linear_terms ^ (1.0 / link_power);
-        }}}
-        if (var_power == 0.0) {
-            var_function = matrix (1.0, rows = num_points, cols = 1);
-        } else { if (var_power == 1.0) {
-            var_function = y_mean;
-        } else {
-            var_function = y_mean ^ var_power;
-        }}
-        means = y_mean;
-        vars = var_function;
-    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
-    # BINOMIAL/BERNOULLI DISTRIBUTION
-        y_prob = matrix (0.0, rows = num_points, cols = 2);
-        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-            y_prob [, 1]  = exp (linear_terms);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
-            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 2)                      { # Binomial.logit
-            elt = exp (linear_terms);
-            y_prob [, 1]  = elt / (1.0 + elt);
-            y_prob [, 2]  = 1.0 / (1.0 + elt);
-        } else { if (link_type == 3)                      { # Binomial.probit
-            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
-            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-            erf_corr =
-                t_gp * ( 0.254829592 
-              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-              + t_gp * (-1.453152027 
-              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
-            y_prob [, 1] = (1 + sign_lt) - erf_corr;
-            y_prob [, 2] = (1 - sign_lt) + erf_corr;
-            y_prob = y_prob / 2;
-        } else { if (link_type == 4)                      { # Binomial.cloglog
-            elt = exp (linear_terms);
-            is_too_small = ppred (10000000 + elt, 10000000, "==");
-            y_prob [, 2] = exp (- elt);
-            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
-        } else { if (link_type == 5)                      { # Binomial.cauchit
-            atan_linear_terms = atan (linear_terms);
-            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
-            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
-        }}}}}}
-        means = y_prob;
-        ones_ctg = matrix (1, rows = 2, cols = 1);
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else { if (dist_type == 3) {
-    # MULTINOMIAL LOGIT DISTRIBUTION
-        elt = exp (linear_terms);
-        ones_pts = matrix (1, rows = num_points, cols = 1);
-        elt = append (elt, ones_pts);
-        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
-        means = elt / (rowSums (elt) %*% t(ones_ctg));
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else {
-        means = matrix (0.0, rows = num_points, cols = 1);
-        vars  = matrix (0.0, rows = num_points, cols = 1);
-}   }}}
-
-glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
-    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
-    return (Matrix[double] log_l_part)
-{
-    num_records = nrow (Y);
-    if (var_power == 1.0) { # Poisson
-        if (link_power == 0.0)  { # Poisson.log
-            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-            b_cumulant = exp (linear_terms);
-        } else {                  # Poisson.power_nonlog
-            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-        }
-        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
-        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
-    } else {
-        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
-            natural_parameters = - exp (- linear_terms);
-            b_cumulant = linear_terms;
-        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
-            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-            b_cumulant = log (linear_terms) / link_power;
-        } else { if (link_power == 0.0) { # PowerDist.log
-            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-        } else {                          # PowerDist.power_nonlog
-            power_np = (1.0 - var_power) / link_power;
-            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
-            power_cu = (2.0 - var_power) / link_power;
-            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
-        }}}
-        log_l_part = Y * natural_parameters - b_cumulant;
-}   }
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A GLM-TYPE REGRESSION TO A NEW (TEST) DATASET
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X     String  ---     Location to read the matrix X of records (feature vectors)
+# B     String  ---     Location to read GLM regression parameters (the betas), with dimensions
+#                           ncol(X)   x k: do not add intercept
+#                           ncol(X)+1 x k: add intercept as given by the last B-row
+#                           if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
+# M     String  " "     Location to write the matrix of predicted response means/probabilities:
+#                           nrow(X) x 1  : for Power-type distributions (dfam=1)
+#                           nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
+#                           nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
+# Y     String  " "     Location to read response matrix Y, with the following dimensions:
+#                           nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
+#                           nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
+#                           nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# dfam  Int     1       GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
+# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link  Int     0       Link function code: 0 = canonical (depends on distribution), 1 = Power,
+#                       2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
+# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
+#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# disp  Double  1.0     Dispersion value, when available
+# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: Matrix M of predicted means/probabilities, some statistics in CSV format (see below)
+# The statistics are printed one per each line, in the following CSV format:
+# NAME,[COLUMN],[SCALED],VALUE
+#   NAME   is the string identifier for the statistic, see the table below.
+#   COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
+#          note that a Binomial/Multinomial one-column Y input is converted into multi-column.
+#   SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
+#          dispersion parameter (disp) scaling has been applied to this statistic.
+#   VALUE  is the value of the statistic.
+#
+# NAME                  COLUMN  SCALED  MEANING
+# ---------------------------------------------------------------------------------------------
+# LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
+# LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
+# PEARSON_X2                      +     Pearson residual X^2 statistic
+# PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
+# PEARSON_X2_PVAL                 +     Pearson X^2 p-value
+# DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
+# DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
+# DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
+# AVG_TOT_Y               +             Average of Y column for a single response value
+# STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
+# AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
+# STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
+# PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
+# PLAIN_R2                +             Plain R^2 of Y column residual with bias included
+# ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
+# PLAIN_R2_NOBIAS         +             Plain R^2 of Y column residual with bias subtracted
+# ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
+# ---------------------------------------------------------------------------------------------
+#
+# Example with distribution = "Poisson.log":
+# hadoop jar SystemML.jar -f GLM_HOME/GLM-predict.dml -nvargs dfam=1 vpow=1.0 link=1 lpow=0.0
+#   disp=3.0 fmt=csv X=INPUT_DIR/X B=INPUT_DIR/B Y=INPUT_DIR/Y M=OUTPUT_DIR/M O=OUTPUT_DIR/out.csv
+
+# Default values for input parameters:
+fileX = $X;
+fileB = $B;
+fileM = ifdef ($M, " ");
+fileY = ifdef ($Y, " ");
+fileO = ifdef ($O, " ");
+fmtM  = ifdef ($fmt, "text");
+
+dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
+var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
+link_type  = ifdef ($link, 0);    # $link = 0;
+link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
+dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
+
+var_power  = as.double (var_power);
+link_power = as.double (link_power); 
+dispersion = as.double (dispersion);
+
+if (dist_type == 3) {
+    link_type = 2;
+} else { if (link_type == 0) { # Canonical Link
+    if (dist_type == 1) {
+        link_type = 1;
+        link_power = 1.0 - var_power;
+    } else { if (dist_type == 2) {
+            link_type = 2;
+}}} }
+
+X = read (fileX);
+num_records  = nrow (X);
+num_features = ncol (X);
+
+B_full = read (fileB);
+if (dist_type == 3) {
+    beta =  B_full [1 : ncol (X),  ];
+    intercept = B_full [nrow(B_full),  ];
+} else {
+    beta =  B_full [1 : ncol (X), 1];
+    intercept = B_full [nrow(B_full), 1];
+}
+if (nrow (B_full) == ncol (X)) {
+    intercept = 0.0 * intercept;
+    is_intercept = FALSE;
+} else {
+    num_features = num_features + 1;
+    is_intercept = TRUE;
+}
+
+ones_rec = matrix (1, rows = num_records, cols = 1);
+linear_terms = X %*% beta + ones_rec %*% intercept;
+[means, vars] =
+    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
+    
+if (fileM != " ") {
+    write (means, fileM, format=fmtM);
+}
+
+if (fileY != " ")
+{
+    Y = read (fileY);
+    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+    
+    # Statistics To Compute:
+    
+    Z_logl               = 0.0 / 0.0;
+    Z_logl_pValue        = 0.0 / 0.0;
+    X2_pearson           = 0.0 / 0.0;
+    df_pearson           = -1;
+    G2_deviance          = 0.0 / 0.0;
+    df_deviance          = -1;
+    X2_pearson_pValue    = 0.0 / 0.0;
+    G2_deviance_pValue   = 0.0 / 0.0;
+    Z_logl_scaled        = 0.0 / 0.0;
+    Z_logl_scaled_pValue = 0.0 / 0.0;
+    X2_scaled            = 0.0 / 0.0;
+    X2_scaled_pValue     = 0.0 / 0.0;
+    G2_scaled            = 0.0 / 0.0;
+    G2_scaled_pValue     = 0.0 / 0.0;
+    
+    if (dist_type == 1 & link_type == 1) {
+    #
+    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
+    #
+        if (link_power == 0.0) {
+            is_zero_Y = ppred (Y, 0.0, "==");
+            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
+        } else {
+            lt_saturated = Y ^ link_power;
+        }
+        Y_counts = ones_rec;
+
+        X2_pearson = sum ((Y - means) ^ 2 / vars);
+        df_pearson = num_records - num_features;
+
+        log_l_part = 
+            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
+        log_l_part_saturated = 
+            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
+            
+        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
+        df_deviance = num_records - num_features;
+        
+    } else { if (dist_type >= 2) {
+    #
+    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
+    #
+        if (ncol (Y) == 1) {
+            num_categories = ncol (beta) + 1;
+            if (min (Y) <= 0) { 
+                # Category labels "0", "-1" etc. are converted into the baseline label
+                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
+            }
+            Y_size = min (num_categories, max(Y));
+            Y_unsized = table (seq (1, num_records, 1), Y);
+            Y = matrix (0, rows = num_records, cols = num_categories);
+            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
+            Y_counts = ones_rec;
+        } else {
+            Y_counts = rowSums (Y);
+        }
+        
+        P = means;
+        zero_Y = ppred (Y, 0.0, "==");
+        zero_P = ppred (P, 0.0, "==");
+        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+        
+        logl_vec = rowSums (Y *  log (P + zero_Y)   );
+        ent1_vec = rowSums (P *  log (P + zero_P)   );
+        ent2_vec = rowSums (P * (log (P + zero_P))^2);
+        E_logl   = sum (Y_counts * ent1_vec);
+        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
+        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
+        
+        means = means * (Y_counts %*% t(ones_ctg));
+        vars  = vars  * (Y_counts %*% t(ones_ctg));
+        
+        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
+        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
+        
+        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
+            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
+        }
+        
+        X2_pearson = sum ((Y - means) ^ 2 / means);
+        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
+        
+        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
+        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
+    }}
+    
+    if (Z_logl == Z_logl) {
+        Z_logl_absneg = - abs (Z_logl);
+        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
+    }
+    if (X2_pearson == X2_pearson & df_pearson > 0) {
+        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_deviance == G2_deviance & df_deviance > 0) {
+        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    Z_logl_scaled = Z_logl / sqrt (dispersion);
+    X2_scaled = X2_pearson / dispersion;
+    G2_scaled = G2_deviance / dispersion;
+
+    if (Z_logl_scaled == Z_logl_scaled) {
+        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
+        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
+    }
+    if (X2_scaled == X2_scaled & df_pearson > 0) {
+        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_scaled == G2_scaled & df_deviance > 0) {
+        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
+    avg_res_Y = colSums (Y - means) / sum (Y_counts);
+    
+    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
+    ss_res_Y     = colSums ((Y - means) ^ 2);
+    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
+    
+    df_ss_res_Y  = sum (Y_counts) - num_features;
+    if (is_intercept) {
+        df_ss_avg_res_Y = df_ss_res_Y;
+    } else {
+        df_ss_avg_res_Y = df_ss_res_Y - 1;
+    }
+    
+    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
+    if (df_ss_avg_res_Y > 0) {
+        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
+    } else {
+        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
+    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
+    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
+    if (df_ss_res_Y > 0) {
+        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
+    } else {
+        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    
+    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
+    
+    # PREPARING THE OUTPUT CSV STATISTICS FILE
+    
+    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
+    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
+    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
+    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
+    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
+    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
+    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
+    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
+    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
+    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
+    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
+    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
+
+    for (i in 1:ncol(Y)) {
+        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
+        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
+        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
+        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
+        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
+        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
+        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
+        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
+        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
+    }
+    
+    if (fileO != " ") {
+        write (str, fileO);
+    } else {
+        print (str);
+    }
+}
+
+glm_means_and_vars = 
+    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
+    return (Matrix[double] means, Matrix[double] vars)
+    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
+{
+    num_points = nrow (linear_terms);
+    if (dist_type == 1 & link_type == 1) {
+    # POWER DISTRIBUTION
+        if          (link_power ==  0.0) {
+            y_mean = exp (linear_terms);
+        } else { if (link_power ==  1.0) {
+            y_mean = linear_terms;
+        } else { if (link_power == -1.0) {
+            y_mean = 1.0 / linear_terms;
+        } else {
+            y_mean = linear_terms ^ (1.0 / link_power);
+        }}}
+        if (var_power == 0.0) {
+            var_function = matrix (1.0, rows = num_points, cols = 1);
+        } else { if (var_power == 1.0) {
+            var_function = y_mean;
+        } else {
+            var_function = y_mean ^ var_power;
+        }}
+        means = y_mean;
+        vars = var_function;
+    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
+    # BINOMIAL/BERNOULLI DISTRIBUTION
+        y_prob = matrix (0.0, rows = num_points, cols = 2);
+        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+            y_prob [, 1]  = exp (linear_terms);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
+            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 2)                      { # Binomial.logit
+            elt = exp (linear_terms);
+            y_prob [, 1]  = elt / (1.0 + elt);
+            y_prob [, 2]  = 1.0 / (1.0 + elt);
+        } else { if (link_type == 3)                      { # Binomial.probit
+            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
+            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
+            erf_corr =
+                t_gp * ( 0.254829592 
+              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
+              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
+              + t_gp * (-1.453152027 
+              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
+            y_prob [, 1] = (1 + sign_lt) - erf_corr;
+            y_prob [, 2] = (1 - sign_lt) + erf_corr;
+            y_prob = y_prob / 2;
+        } else { if (link_type == 4)                      { # Binomial.cloglog
+            elt = exp (linear_terms);
+            is_too_small = ppred (10000000 + elt, 10000000, "==");
+            y_prob [, 2] = exp (- elt);
+            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
+        } else { if (link_type == 5)                      { # Binomial.cauchit
+            atan_linear_terms = atan (linear_terms);
+            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
+            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
+        }}}}}}
+        means = y_prob;
+        ones_ctg = matrix (1, rows = 2, cols = 1);
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else { if (dist_type == 3) {
+    # MULTINOMIAL LOGIT DISTRIBUTION
+        elt = exp (linear_terms);
+        ones_pts = matrix (1, rows = num_points, cols = 1);
+        elt = append (elt, ones_pts);
+        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
+        means = elt / (rowSums (elt) %*% t(ones_ctg));
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else {
+        means = matrix (0.0, rows = num_points, cols = 1);
+        vars  = matrix (0.0, rows = num_points, cols = 1);
+}   }}}
+
+glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
+    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
+    return (Matrix[double] log_l_part)
+{
+    num_records = nrow (Y);
+    if (var_power == 1.0) { # Poisson
+        if (link_power == 0.0)  { # Poisson.log
+            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
+            b_cumulant = exp (linear_terms);
+        } else {                  # Poisson.power_nonlog
+            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
+            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
+        }
+        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
+        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
+    } else {
+        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
+            natural_parameters = - exp (- linear_terms);
+            b_cumulant = linear_terms;
+        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
+            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
+            b_cumulant = log (linear_terms) / link_power;
+        } else { if (link_power == 0.0) { # PowerDist.log
+            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
+            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
+        } else {                          # PowerDist.power_nonlog
+            power_np = (1.0 - var_power) / link_power;
+            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
+            power_cu = (2.0 - var_power) / link_power;
+            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
+        }}}
+        log_l_part = Y * natural_parameters - b_cumulant;
+}   }



[07/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor26d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor26d.dml b/src/test/scripts/functions/parfor/parfor26d.dml
index 3450e3f..0dc5a6d 100644
--- a/src/test/scripts/functions/parfor/parfor26d.dml
+++ b/src/test/scripts/functions/parfor/parfor26d.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=10,cols=10);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = dummy*i; 
-   A[i-1,1] = dummy*i;  
-}
-
+
+A = matrix(0, rows=10,cols=10);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = dummy*i; 
+   A[i-1,1] = dummy*i;  
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor27.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor27.dml b/src/test/scripts/functions/parfor/parfor27.dml
index a315eac..ae578be 100644
--- a/src/test/scripts/functions/parfor/parfor27.dml
+++ b/src/test/scripts/functions/parfor/parfor27.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   for( j in 1:20 )
-   {
-       A[1,j] = B[i,j]+(i+j);
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   for( j in 1:20 )
+   {
+       A[1,j] = B[i,j]+(i+j);
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28.dml b/src/test/scripts/functions/parfor/parfor28.dml
index 3cd1a00..81fca1f 100644
--- a/src/test/scripts/functions/parfor/parfor28.dml
+++ b/src/test/scripts/functions/parfor/parfor28.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0, rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   parfor( j in 1:20 )
-   {
-       A[i,j] = B[i,j]+(i+j);
-   }
-}
-
+
+A = matrix(0, rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   parfor( j in 1:20 )
+   {
+       A[i,j] = B[i,j]+(i+j);
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28b.dml b/src/test/scripts/functions/parfor/parfor28b.dml
index 2ee3451..b4d214b 100644
--- a/src/test/scripts/functions/parfor/parfor28b.dml
+++ b/src/test/scripts/functions/parfor/parfor28b.dml
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   k = 0;
-   while( k < 10 )
-   {
-     parfor( j in 1:20 )
-     {
-         A[i,j] = B[i,j]+(i+j);
-     }
-     
-     k = k + 1;
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   k = 0;
+   while( k < 10 )
+   {
+     parfor( j in 1:20 )
+     {
+         A[i,j] = B[i,j]+(i+j);
+     }
+     
+     k = k + 1;
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28c.dml b/src/test/scripts/functions/parfor/parfor28c.dml
index f16a979..b7053e8 100644
--- a/src/test/scripts/functions/parfor/parfor28c.dml
+++ b/src/test/scripts/functions/parfor/parfor28c.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-for( i in 1:20 )
-{ 
-   parfor( j in 1:20 )
-   {
-       A[i,j] = A[i-1,j]+B[i,j]+(i+j);
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+for( i in 1:20 )
+{ 
+   parfor( j in 1:20 )
+   {
+       A[i,j] = A[i-1,j]+B[i,j]+(i+j);
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28d.dml b/src/test/scripts/functions/parfor/parfor28d.dml
index 7d1201c..1da9244 100644
--- a/src/test/scripts/functions/parfor/parfor28d.dml
+++ b/src/test/scripts/functions/parfor/parfor28d.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   parfor( j in 1:20 )
-   {
-       A[i,j] = A[i-1,j]+B[i,j]+(i+j);
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   parfor( j in 1:20 )
+   {
+       A[i,j] = A[i-1,j]+B[i,j]+(i+j);
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28e.dml b/src/test/scripts/functions/parfor/parfor28e.dml
index 730ec53..654c276 100644
--- a/src/test/scripts/functions/parfor/parfor28e.dml
+++ b/src/test/scripts/functions/parfor/parfor28e.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   A[i,] = A[i,]+2;
-   
-   parfor( j in 1:20 )
-   {
-      print(j); 
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   A[i,] = A[i,]+2;
+   
+   parfor( j in 1:20 )
+   {
+      print(j); 
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28f.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28f.dml b/src/test/scripts/functions/parfor/parfor28f.dml
index 06d147e..8aba85a 100644
--- a/src/test/scripts/functions/parfor/parfor28f.dml
+++ b/src/test/scripts/functions/parfor/parfor28f.dml
@@ -19,21 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=20);
-
-parfor( i in 1:20 )
-{ 
-   A[i,] = A[i,]+2;
-  
-   if( 1>1 )
-   { 
-     parfor( j in 1:20 )
-     {
-        print(j); 
-     }
-   }
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=20);
+
+parfor( i in 1:20 )
+{ 
+   A[i,] = A[i,]+2;
+  
+   if( 1>1 )
+   { 
+     parfor( j in 1:20 )
+     {
+        print(j); 
+     }
+   }
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28g.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28g.dml b/src/test/scripts/functions/parfor/parfor28g.dml
index a7d7e60..1e1b4c0 100644
--- a/src/test/scripts/functions/parfor/parfor28g.dml
+++ b/src/test/scripts/functions/parfor/parfor28g.dml
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=3,cols=1);
-
-p = 2;
-P = 0;
-s = 0;
-n = nrow(A);
-
-tr = n-(s*P+p)
-tc = 1+P*p+P+p
-
-B = matrix(1,rows=tr,cols=tc);
-
-parfor(i in 1:p){
-	B[,1+i] = A[s*P+p-i+1:n-i,]
-}
-parfor(j in 1:P){
-	parfor(k in 0:p){
-		B[,j*(p+1)+k+1] = A[s*P+p-s*j-k+1:n-s*j-k,]
-	}
-}
+
+A = Rand(rows=3,cols=1);
+
+p = 2;
+P = 0;
+s = 0;
+n = nrow(A);
+
+tr = n-(s*P+p)
+tc = 1+P*p+P+p
+
+B = matrix(1,rows=tr,cols=tc);
+
+parfor(i in 1:p){
+	B[,1+i] = A[s*P+p-i+1:n-i,]
+}
+parfor(j in 1:P){
+	parfor(k in 0:p){
+		B[,j*(p+1)+k+1] = A[s*P+p-s*j-k+1:n-s*j-k,]
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor28h.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor28h.dml b/src/test/scripts/functions/parfor/parfor28h.dml
index 03cdd6c..97c3530 100644
--- a/src/test/scripts/functions/parfor/parfor28h.dml
+++ b/src/test/scripts/functions/parfor/parfor28h.dml
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-#example usecase from LSML, orderedDistances.dml
-
-n_t = 1000;
-n_f = 77;
-X_train = matrix(1, rows=n_t,cols=n_f);
-alldist = matrix(0, rows=n_t, cols=n_t);
-parfor(r in 1:(n_t-1))
-{
-    Xr = X_train[r,];
-    myd = matrix (0, rows = 1, cols = n_t)
-    parfor(c in (r+1):n_t) {
-       tmp = rowSums((Xr - X_train[c,])^2);
-       myd[1,c] = tmp;
-    }
-    alldist[r,] = myd;
-}
-alldist = alldist + t(alldist);
-print(sum(alldist));
-
+
+#example usecase from LSML, orderedDistances.dml
+
+n_t = 1000;
+n_f = 77;
+X_train = matrix(1, rows=n_t,cols=n_f);
+alldist = matrix(0, rows=n_t, cols=n_t);
+parfor(r in 1:(n_t-1))
+{
+    Xr = X_train[r,];
+    myd = matrix (0, rows = 1, cols = n_t)
+    parfor(c in (r+1):n_t) {
+       tmp = rowSums((Xr - X_train[c,])^2);
+       myd[1,c] = tmp;
+    }
+    alldist[r,] = myd;
+}
+alldist = alldist + t(alldist);
+print(sum(alldist));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor29.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor29.dml b/src/test/scripts/functions/parfor/parfor29.dml
index 21fa299..e08fdcf 100644
--- a/src/test/scripts/functions/parfor/parfor29.dml
+++ b/src/test/scripts/functions/parfor/parfor29.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   k = i;
-   A[i,k] = dummy*i;
-}
-
+
+A = matrix(0,rows=20,cols=20);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   k = i;
+   A[i,k] = dummy*i;
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor3.dml b/src/test/scripts/functions/parfor/parfor3.dml
index 0226555..33d40de 100644
--- a/src/test/scripts/functions/parfor/parfor3.dml
+++ b/src/test/scripts/functions/parfor/parfor3.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-a = 1;
-
-parfor( i in 1:(a+10) )
-{
-   b = i + a;
-   #print(b);
-}
-
+
+a = 1;
+
+parfor( i in 1:(a+10) )
+{
+   b = i + a;
+   #print(b);
+}
+
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor30.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor30.dml b/src/test/scripts/functions/parfor/parfor30.dml
index 6cf512c..a69dc46 100644
--- a/src/test/scripts/functions/parfor/parfor30.dml
+++ b/src/test/scripts/functions/parfor/parfor30.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-dummy = matrix(1, rows=1,cols=20);
-
-parfor( i in 1:20 )
-{
-   A[i,]=dummy*i;
-}
-
+
+A = matrix(0,rows=20,cols=20);
+dummy = matrix(1, rows=1,cols=20);
+
+parfor( i in 1:20 )
+{
+   A[i,]=dummy*i;
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor31.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor31.dml b/src/test/scripts/functions/parfor/parfor31.dml
index 510de9a..a6c220e 100644
--- a/src/test/scripts/functions/parfor/parfor31.dml
+++ b/src/test/scripts/functions/parfor/parfor31.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = matrix(0,rows=20,cols=20);
-C = matrix(0,rows=20,cols=20);
-dummy1 = matrix(1,rows=10,cols=1);
-dummy2 = matrix(1,rows=1,cols=10);
-
-parfor( i in 1:20 )
-{                 
-   A[1:10,i]=dummy1*i;
-   A[11:20,i]=dummy1*i;
-   
-   B[i,1:10]=dummy2*i;
-   B[i,11:20]=dummy2*i;
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = matrix(0,rows=20,cols=20);
+C = matrix(0,rows=20,cols=20);
+dummy1 = matrix(1,rows=10,cols=1);
+dummy2 = matrix(1,rows=1,cols=10);
+
+parfor( i in 1:20 )
+{                 
+   A[1:10,i]=dummy1*i;
+   A[11:20,i]=dummy1*i;
+   
+   B[i,1:10]=dummy2*i;
+   B[i,11:20]=dummy2*i;
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor31b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor31b.dml b/src/test/scripts/functions/parfor/parfor31b.dml
index 28cd9ee..e472c82 100644
--- a/src/test/scripts/functions/parfor/parfor31b.dml
+++ b/src/test/scripts/functions/parfor/parfor31b.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = matrix(0,rows=20,cols=20);
-C = matrix(0,rows=20,cols=20);
-dummy1 = matrix(1,rows=10,cols=1);
-dummy2 = matrix(1,rows=1,cols=10);
-
-parfor( i in 1:20 )
-{                 
-   A[1:10,i]=dummy1*i;
-   A[11:nrow(A),i]=dummy1*i;
-   
-   B[i,1:10]=dummy2*i;
-   B[i,11:ncol(B)]=dummy2*i;
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = matrix(0,rows=20,cols=20);
+C = matrix(0,rows=20,cols=20);
+dummy1 = matrix(1,rows=10,cols=1);
+dummy2 = matrix(1,rows=1,cols=10);
+
+parfor( i in 1:20 )
+{                 
+   A[1:10,i]=dummy1*i;
+   A[11:nrow(A),i]=dummy1*i;
+   
+   B[i,1:10]=dummy2*i;
+   B[i,11:ncol(B)]=dummy2*i;
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor32.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor32.dml b/src/test/scripts/functions/parfor/parfor32.dml
index 812a87b..ac0875a 100644
--- a/src/test/scripts/functions/parfor/parfor32.dml
+++ b/src/test/scripts/functions/parfor/parfor32.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = matrix(0,rows=20,cols=20);
-C = matrix(0,rows=20,cols=20);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[1:10,i]=dummy*i;
-   A[i,5]=dummy*i;
-   
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = matrix(0,rows=20,cols=20);
+C = matrix(0,rows=20,cols=20);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[1:10,i]=dummy*i;
+   A[i,5]=dummy*i;
+   
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor32b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor32b.dml b/src/test/scripts/functions/parfor/parfor32b.dml
index c2d938d..42fcf34 100644
--- a/src/test/scripts/functions/parfor/parfor32b.dml
+++ b/src/test/scripts/functions/parfor/parfor32b.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = matrix(0,rows=20,cols=20);
-C = matrix(0,rows=20,cols=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[i:i,1]=dummy*i;
-   A[i:(i+1),1]=dummy*i;  
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = matrix(0,rows=20,cols=20);
+C = matrix(0,rows=20,cols=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[i:i,1]=dummy*i;
+   A[i:(i+1),1]=dummy*i;  
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor32c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor32c.dml b/src/test/scripts/functions/parfor/parfor32c.dml
index 9e80544..6cf0bb4 100644
--- a/src/test/scripts/functions/parfor/parfor32c.dml
+++ b/src/test/scripts/functions/parfor/parfor32c.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = matrix(0,rows=20,cols=20);
-C = matrix(0,rows=20,cols=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[i:i,1]=dummy*i;
-   A[i:i,2]=dummy*i;
-   A[i,1]=dummy*i; 
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = matrix(0,rows=20,cols=20);
+C = matrix(0,rows=20,cols=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[i:i,1]=dummy*i;
+   A[i:i,2]=dummy*i;
+   A[i,1]=dummy*i; 
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor32d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor32d.dml b/src/test/scripts/functions/parfor/parfor32d.dml
index ea719e2..4c348a3 100644
--- a/src/test/scripts/functions/parfor/parfor32d.dml
+++ b/src/test/scripts/functions/parfor/parfor32d.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[,]=dummy*i; 
-}
-
+
+A = matrix(0,rows=20,cols=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[,]=dummy*i; 
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor32e.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor32e.dml b/src/test/scripts/functions/parfor/parfor32e.dml
index 7636818..552132f 100644
--- a/src/test/scripts/functions/parfor/parfor32e.dml
+++ b/src/test/scripts/functions/parfor/parfor32e.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   k=dummy*(i-1);
-   A[k,]=dummy*i; 
-}
-
+
+A = matrix(0,rows=20,cols=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   k=dummy*(i-1);
+   A[k,]=dummy*i; 
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor33.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor33.dml b/src/test/scripts/functions/parfor/parfor33.dml
index 06cd2ad..cf25f5b 100644
--- a/src/test/scripts/functions/parfor/parfor33.dml
+++ b/src/test/scripts/functions/parfor/parfor33.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=3,min=1,max=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[1,i]=dummy*i;
-   A[2,B[i,]]=dummy*i;  
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=3,min=1,max=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[1,i]=dummy*i;
+   A[2,B[i,]]=dummy*i;  
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor34.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor34.dml b/src/test/scripts/functions/parfor/parfor34.dml
index 075518c..3ae4e6a 100644
--- a/src/test/scripts/functions/parfor/parfor34.dml
+++ b/src/test/scripts/functions/parfor/parfor34.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=3,min=1,max=20);
-dummy = matrix(1,rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   A[1,i]=dummy*i;
-   A[B[,1],2]=dummy*i;  
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=3,min=1,max=20);
+dummy = matrix(1,rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   A[1,i]=dummy*i;
+   A[B[,1],2]=dummy*i;  
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor35.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor35.dml b/src/test/scripts/functions/parfor/parfor35.dml
index 32f0735..81f7564 100644
--- a/src/test/scripts/functions/parfor/parfor35.dml
+++ b/src/test/scripts/functions/parfor/parfor35.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-B = Rand(rows=20,cols=3,min=1,max=20);
-dummy = matrix(1, rows=1,cols=1);
-
-parfor( i in 1:20 )
-{
-   val = castAsScalar(B[i,i]);
-   b = A[i,val]; #due to parser change A[i,B[i,]];  
-   c = dummy*(b+i);
-}
-
+
+A = matrix(0,rows=20,cols=20);
+B = Rand(rows=20,cols=3,min=1,max=20);
+dummy = matrix(1, rows=1,cols=1);
+
+parfor( i in 1:20 )
+{
+   val = castAsScalar(B[i,i]);
+   b = A[i,val]; #due to parser change A[i,B[i,]];  
+   c = dummy*(b+i);
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor35b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor35b.dml b/src/test/scripts/functions/parfor/parfor35b.dml
index 10affce..4a69d10 100644
--- a/src/test/scripts/functions/parfor/parfor35b.dml
+++ b/src/test/scripts/functions/parfor/parfor35b.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = Rand(rows=20,cols=20,min=1,max=20);
-
-parfor( i in 1:19 )
-{
-   A[i+1.0, 1] = i;
-   A[1, i+1.0] = i^2;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = Rand(rows=20,cols=20,min=1,max=20);
+
+parfor( i in 1:19 )
+{
+   A[i+1.0, 1] = i;
+   A[1, i+1.0] = i^2;
+}
+
 print(sum(A));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor35c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor35c.dml b/src/test/scripts/functions/parfor/parfor35c.dml
index 6aae1b9..b47c205 100644
--- a/src/test/scripts/functions/parfor/parfor35c.dml
+++ b/src/test/scripts/functions/parfor/parfor35c.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = Rand(rows=20,cols=20,min=1,max=20);
-
-parfor( i in 1:9 )
-{
-   A[i*2.0, 1] = i;
-   A[1, i*2.0] = i^2;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = Rand(rows=20,cols=20,min=1,max=20);
+
+parfor( i in 1:9 )
+{
+   A[i*2.0, 1] = i;
+   A[1, i*2.0] = i^2;
+}
+
 print(sum(A));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor35d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor35d.dml b/src/test/scripts/functions/parfor/parfor35d.dml
index ff3db83..d22b840 100644
--- a/src/test/scripts/functions/parfor/parfor35d.dml
+++ b/src/test/scripts/functions/parfor/parfor35d.dml
@@ -1,31 +1,31 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = Rand(rows=20,cols=20,min=1,max=20);
-
-parfor( i in 1:2 )
-{
-   A[i+7.3, 1] = i;
-   A[1, i+7.3] = i^2;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = Rand(rows=20,cols=20,min=1,max=20);
+
+parfor( i in 1:2 )
+{
+   A[i+7.3, 1] = i;
+   A[1, i+7.3] = i^2;
+}
+
 print(sum(A));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor36.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor36.dml b/src/test/scripts/functions/parfor/parfor36.dml
index 30d9de4..f47365b 100644
--- a/src/test/scripts/functions/parfor/parfor36.dml
+++ b/src/test/scripts/functions/parfor/parfor36.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-
-parfor( i in 1:10 )
-{
-   A[1,i+3] = A[1,i];  
-}
-
+
+A = matrix(0,rows=20,cols=20);
+
+parfor( i in 1:10 )
+{
+   A[1,i+3] = A[1,i];  
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor37.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor37.dml b/src/test/scripts/functions/parfor/parfor37.dml
index f5c0267..0f84b8e 100644
--- a/src/test/scripts/functions/parfor/parfor37.dml
+++ b/src/test/scripts/functions/parfor/parfor37.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=20,cols=20);
-
-parfor( i in 1:10 )
-{
-   A[1,i+10] = A[1,i];  
-}
-
+
+A = matrix(0,rows=20,cols=20);
+
+parfor( i in 1:10 )
+{
+   A[1,i+10] = A[1,i];  
+}
+
 #print(C);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor38.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor38.dml b/src/test/scripts/functions/parfor/parfor38.dml
index 972f117..f3749c0 100644
--- a/src/test/scripts/functions/parfor/parfor38.dml
+++ b/src/test/scripts/functions/parfor/parfor38.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = B[i,1] + A[i-1,1] + A[i,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = B[i,1] + A[i-1,1] + A[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor39.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor39.dml b/src/test/scripts/functions/parfor/parfor39.dml
index b466312..3a55d8d 100644
--- a/src/test/scripts/functions/parfor/parfor39.dml
+++ b/src/test/scripts/functions/parfor/parfor39.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = B[i,1] + A[i,1] + A[i-1,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = B[i,1] + A[i,1] + A[i-1,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor4.dml b/src/test/scripts/functions/parfor/parfor4.dml
index f68879c..57c78f0 100644
--- a/src/test/scripts/functions/parfor/parfor4.dml
+++ b/src/test/scripts/functions/parfor/parfor4.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-a = 1;
-
-parfor( i in 1:(10+a) )
-{
-   b = i + a;
-   #print(b);
-}
-
+
+a = 1;
+
+parfor( i in 1:(10+a) )
+{
+   b = i + a;
+   #print(b);
+}
+
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor40.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor40.dml b/src/test/scripts/functions/parfor/parfor40.dml
index 53a46a8..57335b7 100644
--- a/src/test/scripts/functions/parfor/parfor40.dml
+++ b/src/test/scripts/functions/parfor/parfor40.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-execSum = function(Matrix[Double] X, Matrix[Double] Y) return (Matrix[Double] Z, Matrix[Double] V) 
-{
-   Z = X + Y;
-   V = X + Y;    
-}
-
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   [A[i-1,1],A[i,1]] = execSum( B[i,1], A[i,1] );
-}
-
+
+execSum = function(Matrix[Double] X, Matrix[Double] Y) return (Matrix[Double] Z, Matrix[Double] V) 
+{
+   Z = X + Y;
+   V = X + Y;    
+}
+
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   [A[i-1,1],A[i,1]] = execSum( B[i,1], A[i,1] );
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor41.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor41.dml b/src/test/scripts/functions/parfor/parfor41.dml
index ab77a0e..3780873 100644
--- a/src/test/scripts/functions/parfor/parfor41.dml
+++ b/src/test/scripts/functions/parfor/parfor41.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-execSum = function(Matrix[Double] X, Matrix[Double] Y) return (Matrix[Double] Z, Matrix[Double] V) 
-{
-   Z = X + Y;
-   V = X + Y;    
-}
-
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   [A[i,1],A[i-1,1]] = execSum( B[i,1], A[i,1] );
-}
-
+
+execSum = function(Matrix[Double] X, Matrix[Double] Y) return (Matrix[Double] Z, Matrix[Double] V) 
+{
+   Z = X + Y;
+   V = X + Y;    
+}
+
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   [A[i,1],A[i-1,1]] = execSum( B[i,1], A[i,1] );
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor42.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor42.dml b/src/test/scripts/functions/parfor/parfor42.dml
index 50a4580..5f5b833 100644
--- a/src/test/scripts/functions/parfor/parfor42.dml
+++ b/src/test/scripts/functions/parfor/parfor42.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = B[i,1] + sum(A[i-1,1]) + A[i,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = B[i,1] + sum(A[i-1,1]) + A[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor43.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor43.dml b/src/test/scripts/functions/parfor/parfor43.dml
index c43d4bd..b1988a2 100644
--- a/src/test/scripts/functions/parfor/parfor43.dml
+++ b/src/test/scripts/functions/parfor/parfor43.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = matrix(0,rows=10,cols=1);
-B = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{ 
-   A[i,1] = B[i,1] + sum(A[i,1]) + A[i,1];
-}
-
+
+A = matrix(0,rows=10,cols=1);
+B = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{ 
+   A[i,1] = B[i,1] + sum(A[i,1]) + A[i,1];
+}
+
 #print(A);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor44.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor44.dml b/src/test/scripts/functions/parfor/parfor44.dml
index 7b26740..daaa732 100644
--- a/src/test/scripts/functions/parfor/parfor44.dml
+++ b/src/test/scripts/functions/parfor/parfor44.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-B = Rand(rows=10,cols=1);
-
-parfor( i in 1:10 )
-{ 
-   A = Rand(rows=10,cols=1);
-}
-
+
+B = Rand(rows=10,cols=1);
+
+parfor( i in 1:10 )
+{ 
+   A = Rand(rows=10,cols=1);
+}
+
 B = B * A
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor45.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor45.dml b/src/test/scripts/functions/parfor/parfor45.dml
index a9b334d..747d9b3 100644
--- a/src/test/scripts/functions/parfor/parfor45.dml
+++ b/src/test/scripts/functions/parfor/parfor45.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-numC = 5;
-numF = 10;
-
-A = Rand(rows=numC, cols=numF, min=0.0, max=1.0, sparsity=1.0)
-
-parfor(i in 1:numC ) {
-	A[i,] = A[i,]/sum(A[i,]);
-}	
-
-B = matrix(0, rows=numC, cols=numF)
-	
-parfor(i in 1:numC) {
-	for (j in 2:numF) {
-    B[i,j] = B[i,j-1] + A[i,j];
-	}
+
+numC = 5;
+numF = 10;
+
+A = Rand(rows=numC, cols=numF, min=0.0, max=1.0, sparsity=1.0)
+
+parfor(i in 1:numC ) {
+	A[i,] = A[i,]/sum(A[i,]);
+}	
+
+B = matrix(0, rows=numC, cols=numF)
+	
+parfor(i in 1:numC) {
+	for (j in 2:numF) {
+    B[i,j] = B[i,j-1] + A[i,j];
+	}
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor46.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor46.dml b/src/test/scripts/functions/parfor/parfor46.dml
index 2b5691b..95f799f 100644
--- a/src/test/scripts/functions/parfor/parfor46.dml
+++ b/src/test/scripts/functions/parfor/parfor46.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-numC = 5;
-numF = 10;
-
-A = Rand(rows=numC, cols=numF, min=0.0, max=1.0, sparsity=1.0)
-B = matrix(0, rows=numC, cols=numF)  #classFeatureCumulativeDensityFunction
-C = matrix(0, rows=numC, cols=numF)  #tempCumulative
-
-parfor(i in 1:numC ) 
-{
-	A[i,] = A[i,]/sum(A[i,]);
-  C[i,1] = A[i,1];
-	
-  for (j in 2:numF) 
-  {
-		 C[i,j] = C[i,j-1] + A[i,j];
-  }
-  
-  B[i,] = C[i,];
-}	
-
-
-
-
-
+
+numC = 5;
+numF = 10;
+
+A = Rand(rows=numC, cols=numF, min=0.0, max=1.0, sparsity=1.0)
+B = matrix(0, rows=numC, cols=numF)  #classFeatureCumulativeDensityFunction
+C = matrix(0, rows=numC, cols=numF)  #tempCumulative
+
+parfor(i in 1:numC ) 
+{
+	A[i,] = A[i,]/sum(A[i,]);
+  C[i,1] = A[i,1];
+	
+  for (j in 2:numF) 
+  {
+		 C[i,j] = C[i,j-1] + A[i,j];
+  }
+  
+  B[i,] = C[i,];
+}	
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor47.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor47.dml b/src/test/scripts/functions/parfor/parfor47.dml
index 2ed8710..9fe2023 100644
--- a/src/test/scripts/functions/parfor/parfor47.dml
+++ b/src/test/scripts/functions/parfor/parfor47.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-a = 5
-b = 10
-c = 20
-d = 5
-
-A = Rand(rows=a, cols=b, min=0.0, max=1.0, sparsity=1.0)
-B = matrix(0,rows=c, cols=d)
-dummy = matrix(1, rows=1,cols=1);
-
-parfor(i in 1:a){
-	for(j in 2:b){
-		A[i,j] = A[i,j-1] + A[i,j]
-	}
-}
-
-parfor(i in 1:c){
-	  parfor(j in 1:d){
-    	for(k in 1:b){
-    		B[i,j] = dummy*(i*j);
-    	}
-    }
-}   
-
-
+
+a = 5
+b = 10
+c = 20
+d = 5
+
+A = Rand(rows=a, cols=b, min=0.0, max=1.0, sparsity=1.0)
+B = matrix(0,rows=c, cols=d)
+dummy = matrix(1, rows=1,cols=1);
+
+parfor(i in 1:a){
+	for(j in 2:b){
+		A[i,j] = A[i,j-1] + A[i,j]
+	}
+}
+
+parfor(i in 1:c){
+	  parfor(j in 1:d){
+    	for(k in 1:b){
+    		B[i,j] = dummy*(i*j);
+    	}
+    }
+}   
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor48.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor48.dml b/src/test/scripts/functions/parfor/parfor48.dml
index 3e01135..bc4b2db 100644
--- a/src/test/scripts/functions/parfor/parfor48.dml
+++ b/src/test/scripts/functions/parfor/parfor48.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
-
-parfor(i in 1:nrow(A)){
-	parfor(j in 1:ncol(A)){
-		print("i="+i+", j="+j);
-	}
-}
-   
-
-
+
+A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
+
+parfor(i in 1:nrow(A)){
+	parfor(j in 1:ncol(A)){
+		print("i="+i+", j="+j);
+	}
+}
+   
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor48b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor48b.dml b/src/test/scripts/functions/parfor/parfor48b.dml
index d35da2a..c87f920 100644
--- a/src/test/scripts/functions/parfor/parfor48b.dml
+++ b/src/test/scripts/functions/parfor/parfor48b.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
-
-parfor(i in 1:castAsScalar(A[1,1])){
-	parfor(j in 1:A+ncol(A)){
-		print("i="+i+", j="+j);
-	}
-}
-   
-
-
+
+A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
+
+parfor(i in 1:castAsScalar(A[1,1])){
+	parfor(j in 1:A+ncol(A)){
+		print("i="+i+", j="+j);
+	}
+}
+   
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor48c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor48c.dml b/src/test/scripts/functions/parfor/parfor48c.dml
index 74c8a5e..cbeae42 100644
--- a/src/test/scripts/functions/parfor/parfor48c.dml
+++ b/src/test/scripts/functions/parfor/parfor48c.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
-
-a = 1;
-parfor(i in (2-a):(10-1+1)){
-	print("i="+i);
-}
-   
-
-
+
+A = Rand(rows=10, cols=10, min=0.0, max=1.0, sparsity=1.0)
+
+a = 1;
+parfor(i in (2-a):(10-1+1)){
+	print("i="+i);
+}
+   
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor49a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor49a.dml b/src/test/scripts/functions/parfor/parfor49a.dml
index 7cfc7b6..fa91050 100644
--- a/src/test/scripts/functions/parfor/parfor49a.dml
+++ b/src/test/scripts/functions/parfor/parfor49a.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function (Integer inval) return (Integer out)
-{
-   A = matrix( 1, rows=10, cols=10 );
-   parfor (f in 1:inval) {
-       A = A + f;
-   }
-   out = round(sum(A));
-}
-
-x = foo( 10 );
-print( x );
+
+foo = function (Integer inval) return (Integer out)
+{
+   A = matrix( 1, rows=10, cols=10 );
+   parfor (f in 1:inval) {
+       A = A + f;
+   }
+   out = round(sum(A));
+}
+
+x = foo( 10 );
+print( x );

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor49b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor49b.dml b/src/test/scripts/functions/parfor/parfor49b.dml
index 7542f03..4035d9a 100644
--- a/src/test/scripts/functions/parfor/parfor49b.dml
+++ b/src/test/scripts/functions/parfor/parfor49b.dml
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function (Integer inval) return (Matrix[Double] out)
-{   
-   if( 1==1 ) {} #prevent inlining
-   
-   A = matrix( 1, rows=10, cols=10 );
-   out = A
-}
-
-X = foo(7);
-parfor( i in 1:10 )
-{
-    if( i==1 )
-    {
-       X = foo(i);
-    }
-    else
-    {
-       Y = foo(i);
-    }
-}
-
-print(sum(X)); 
+
+foo = function (Integer inval) return (Matrix[Double] out)
+{   
+   if( 1==1 ) {} #prevent inlining
+   
+   A = matrix( 1, rows=10, cols=10 );
+   out = A
+}
+
+X = foo(7);
+parfor( i in 1:10 )
+{
+    if( i==1 )
+    {
+       X = foo(i);
+    }
+    else
+    {
+       Y = foo(i);
+    }
+}
+
+print(sum(X)); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor5.dml b/src/test/scripts/functions/parfor/parfor5.dml
index dfbfaa9..d5c2455 100644
--- a/src/test/scripts/functions/parfor/parfor5.dml
+++ b/src/test/scripts/functions/parfor/parfor5.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-a = 1;
-
-parfor( i in 1:(a+10) )
-{
-   b = i + a;
-   a = b;
-   #print(a);
-   #print(b);
-}
-
+
+a = 1;
+
+parfor( i in 1:(a+10) )
+{
+   b = i + a;
+   a = b;
+   #print(a);
+   #print(b);
+}
+
 #print(a);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor50.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor50.dml b/src/test/scripts/functions/parfor/parfor50.dml
index 2ab7c7b..2d86be8 100644
--- a/src/test/scripts/functions/parfor/parfor50.dml
+++ b/src/test/scripts/functions/parfor/parfor50.dml
@@ -19,52 +19,52 @@
 #
 #-------------------------------------------------------------
 
-
-binning = externalFunction(Matrix[Double] A, Integer binsize, Integer numbins) return (Matrix[Double] B, Integer numbinsdef) 
-	implemented in (classname="org.apache.sysml.udf.lib.BinningWrapper",exectype="mem")
-	
-fmt = ifdef($fmt, "text");
-num_bins = ifdef($bins, 3); # 50
-depth = ifdef($depth, 10);
-num_trees = ifdef($trees, 2); # 10
-num_feature_samples = ifdef($feature_samples, 3); # 100
-num_samples = ifdef($samples, 10);
-
-num_classes = 7;
-bin_size = 7;
-
-X = rand (rows = 10, cols = 5, min = 0, max = 10);
-Y = floor (rand (rows = 10, cols = 1, min = 1, max = 3.999999999999));
-
-num_rows = nrow (X);
-num_features = ncol (X);
-
-count_thresholds = matrix (0, rows = 1, cols = num_features)
-thresholds = matrix (0, rows = num_bins + 1, cols = num_features)
-parfor(i1 in 1:num_features) { # this parfor works fine!
-      col = order (target = X[,i1], by = 1, decreasing = FALSE);
-      [col_bins, num_bins_defined] = binning (col, bin_size, num_bins);
-      count_thresholds[,i1] = num_bins_defined;
-      thresholds[,i1] = col_bins;	
-}
-
-
-max_num_bins = max (count_thresholds);
-X_ext_left = matrix (0, rows = num_rows, cols = num_features * num_classes * max_num_bins);
-X_ext_right = matrix (0, rows = num_rows, cols = num_features * num_classes * max_num_bins);
-parfor (i2 in 1:num_features, check=0) { # The following parfors need check=0!
-	parfor (i3 in 1:num_rows) {
-		count_threshold = count_thresholds[,i2];
-		for (i4 in 0:(as.scalar(count_threshold) - 1)) {
-			cur_threshold = as.scalar(count_threshold) - i4;
-			offset_feature = (i2 - 1) * max_num_bins * num_classes;
-			offset_bin = (cur_threshold - 1) * num_classes;
-			if (as.scalar(X[i3,i2]) < as.scalar(thresholds[cur_threshold, i2])) {
-				X_ext_left[i3, (offset_feature + offset_bin + as.scalar(Y[i3,1]))] = 1;
-			} else {
-				X_ext_right[i3, (offset_feature + offset_bin + as.scalar(Y[i3,1]))] = 1;
-			}
-		}
-	}
-}
-
+
+binning = externalFunction(Matrix[Double] A, Integer binsize, Integer numbins) return (Matrix[Double] B, Integer numbinsdef) 
+	implemented in (classname="org.apache.sysml.udf.lib.BinningWrapper",exectype="mem")
+	
+fmt = ifdef($fmt, "text");
+num_bins = ifdef($bins, 3); # 50
+depth = ifdef($depth, 10);
+num_trees = ifdef($trees, 2); # 10
+num_feature_samples = ifdef($feature_samples, 3); # 100
+num_samples = ifdef($samples, 10);
+
+num_classes = 7;
+bin_size = 7;
+
+X = rand (rows = 10, cols = 5, min = 0, max = 10);
+Y = floor (rand (rows = 10, cols = 1, min = 1, max = 3.999999999999));
+
+num_rows = nrow (X);
+num_features = ncol (X);
+
+count_thresholds = matrix (0, rows = 1, cols = num_features)
+thresholds = matrix (0, rows = num_bins + 1, cols = num_features)
+parfor(i1 in 1:num_features) { # this parfor works fine!
+      col = order (target = X[,i1], by = 1, decreasing = FALSE);
+      [col_bins, num_bins_defined] = binning (col, bin_size, num_bins);
+      count_thresholds[,i1] = num_bins_defined;
+      thresholds[,i1] = col_bins;	
+}
+
+
+max_num_bins = max (count_thresholds);
+X_ext_left = matrix (0, rows = num_rows, cols = num_features * num_classes * max_num_bins);
+X_ext_right = matrix (0, rows = num_rows, cols = num_features * num_classes * max_num_bins);
+parfor (i2 in 1:num_features, check=0) { # The following parfors need check=0!
+	parfor (i3 in 1:num_rows) {
+		count_threshold = count_thresholds[,i2];
+		for (i4 in 0:(as.scalar(count_threshold) - 1)) {
+			cur_threshold = as.scalar(count_threshold) - i4;
+			offset_feature = (i2 - 1) * max_num_bins * num_classes;
+			offset_bin = (cur_threshold - 1) * num_classes;
+			if (as.scalar(X[i3,i2]) < as.scalar(thresholds[cur_threshold, i2])) {
+				X_ext_left[i3, (offset_feature + offset_bin + as.scalar(Y[i3,1]))] = 1;
+			} else {
+				X_ext_right[i3, (offset_feature + offset_bin + as.scalar(Y[i3,1]))] = 1;
+			}
+		}
+	}
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor51.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor51.dml b/src/test/scripts/functions/parfor/parfor51.dml
index 644007e..c6b6439 100644
--- a/src/test/scripts/functions/parfor/parfor51.dml
+++ b/src/test/scripts/functions/parfor/parfor51.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-n = 2;
-m = 2;
-A = matrix(7, rows=n, cols=1);
-
-parfor (j in 1:m){
-  parfor (i in  1:n){
-		A[i,1] = j*7+i;
-	}
-}
-
+
+n = 2;
+m = 2;
+A = matrix(7, rows=n, cols=1);
+
+parfor (j in 1:m){
+  parfor (i in  1:n){
+		A[i,1] = j*7+i;
+	}
+}
+
 print(sum(A));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor52.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor52.dml b/src/test/scripts/functions/parfor/parfor52.dml
index 5a1d5e2..cfc7a23 100644
--- a/src/test/scripts/functions/parfor/parfor52.dml
+++ b/src/test/scripts/functions/parfor/parfor52.dml
@@ -19,16 +19,16 @@
 #
 #-------------------------------------------------------------
 
-
-V = matrix(0, rows=2, cols=2);
-print(sum(V)); 
-parfor( i in 1:2 )
-{
-   print(sum(V));
-   
-   parfor( j in 1:2 )
-   {
-      V[i,j] = 7; 
-   }
-}  
+
+V = matrix(0, rows=2, cols=2);
+print(sum(V)); 
+parfor( i in 1:2 )
+{
+   print(sum(V));
+   
+   parfor( j in 1:2 )
+   {
+      V[i,j] = 7; 
+   }
+}  
 print(sum(V));       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor6.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor6.dml b/src/test/scripts/functions/parfor/parfor6.dml
index f9f40eb..aae1370 100644
--- a/src/test/scripts/functions/parfor/parfor6.dml
+++ b/src/test/scripts/functions/parfor/parfor6.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10,cols=1);
-
-parfor( i in 1:10 )
-{
-   b = i + castAsScalar(A[i,1]);
-   #print(b);
-}
+
+A = Rand(rows=10,cols=1);
+
+parfor( i in 1:10 )
+{
+   b = i + castAsScalar(A[i,1]);
+   #print(b);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor7.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor7.dml b/src/test/scripts/functions/parfor/parfor7.dml
index f1e6b72..39c14ad 100644
--- a/src/test/scripts/functions/parfor/parfor7.dml
+++ b/src/test/scripts/functions/parfor/parfor7.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10,cols=1);
-
-parfor( i in 2:10 )
-{
-   b = i + castAsScalar(A[i,1]) + castAsScalar(A[i+1,1]);
-   
-   #print(b);
-}
+
+A = Rand(rows=10,cols=1);
+
+parfor( i in 2:10 )
+{
+   b = i + castAsScalar(A[i,1]) + castAsScalar(A[i+1,1]);
+   
+   #print(b);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor8.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor8.dml b/src/test/scripts/functions/parfor/parfor8.dml
index c5cdc5d..21d96ee 100644
--- a/src/test/scripts/functions/parfor/parfor8.dml
+++ b/src/test/scripts/functions/parfor/parfor8.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10,cols=1);
-a = 1
-
-parfor( i in 2:10 )
-{ 
-   b = a + castAsScalar(A[i,1]) + castAsScalar(A[i+1,1]);
-   a = i;
-  # print(b);
-}
+
+A = Rand(rows=10,cols=1);
+a = 1
+
+parfor( i in 2:10 )
+{ 
+   b = a + castAsScalar(A[i,1]) + castAsScalar(A[i+1,1]);
+   a = i;
+  # print(b);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor9.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor9.dml b/src/test/scripts/functions/parfor/parfor9.dml
index ebaef43..fb50cb7 100644
--- a/src/test/scripts/functions/parfor/parfor9.dml
+++ b/src/test/scripts/functions/parfor/parfor9.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-A = Rand(rows=10,cols=1);
-a = 1
-
-parfor( i in 2:10 )
-{ 
-   b = a + castAsScalar(A[i,1]) + castAsScalar(A[i-1,1]);
-   a = i;
-   #print(b);
-}
+
+A = Rand(rows=10,cols=1);
+a = 1
+
+parfor( i in 2:10 )
+{ 
+   b = a + castAsScalar(A[i,1]) + castAsScalar(A[i-1,1]);
+   a = i;
+   #print(b);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_NaN1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_NaN1.R b/src/test/scripts/functions/parfor/parfor_NaN1.R
index 8d83979..fe686e1 100644
--- a/src/test/scripts/functions/parfor/parfor_NaN1.R
+++ b/src/test/scripts/functions/parfor/parfor_NaN1.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-
-nr = as.integer(args[1]);
-xr = as.integer(args[2]);
-NaNval = 0/0;
-
-R = matrix(0, nr, nr); 
-R[1:xr,] = matrix(NaNval, xr, nr);
-
-for( i in 1:nr )
-{
-   R[i,i] = i^2 + 7;           
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[3], "R", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+
+nr = as.integer(args[1]);
+xr = as.integer(args[2]);
+NaNval = 0/0;
+
+R = matrix(0, nr, nr); 
+R[1:xr,] = matrix(NaNval, xr, nr);
+
+for( i in 1:nr )
+{
+   R[i,i] = i^2 + 7;           
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[3], "R", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_NaN1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_NaN1.dml b/src/test/scripts/functions/parfor/parfor_NaN1.dml
index 151579b..2f9eaad 100644
--- a/src/test/scripts/functions/parfor/parfor_NaN1.dml
+++ b/src/test/scripts/functions/parfor/parfor_NaN1.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-
-nr = $1;
-xr = $2;
-NaNval = 0/0;
-
-R = matrix(0, rows=nr, cols=nr); 
-R[1:xr,] = matrix(NaNval, rows=xr, cols=nr);
-
-parfor( i in 1:nr )
-{
-   R[i,i] = i^2 + 7;           
-}   
-
+
+
+nr = $1;
+xr = $2;
+NaNval = 0/0;
+
+R = matrix(0, rows=nr, cols=nr); 
+R[1:xr,] = matrix(NaNval, rows=xr, cols=nr);
+
+parfor( i in 1:nr )
+{
+   R[i,i] = i^2 + 7;           
+}   
+
 write(R, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_NaN2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_NaN2.R b/src/test/scripts/functions/parfor/parfor_NaN2.R
index 0326db4..43c50af 100644
--- a/src/test/scripts/functions/parfor/parfor_NaN2.R
+++ b/src/test/scripts/functions/parfor/parfor_NaN2.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-
-nr = as.integer(args[1]);
-xr = as.integer(args[2]);
-NaNval = 0/0;
-
-R = matrix(0, nr, nr); 
-R[1:xr,] = matrix(1, xr, nr);
-
-for( i in 1:nr )
-{
-   R[i,i] = NaNval;           
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[3], "R", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+
+nr = as.integer(args[1]);
+xr = as.integer(args[2]);
+NaNval = 0/0;
+
+R = matrix(0, nr, nr); 
+R[1:xr,] = matrix(1, xr, nr);
+
+for( i in 1:nr )
+{
+   R[i,i] = NaNval;           
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[3], "R", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_NaN2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_NaN2.dml b/src/test/scripts/functions/parfor/parfor_NaN2.dml
index ba7aec4..f5733b4 100644
--- a/src/test/scripts/functions/parfor/parfor_NaN2.dml
+++ b/src/test/scripts/functions/parfor/parfor_NaN2.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-
-nr = $1;
-xr = $2;
-NaNval = 0/0;
-
-R = matrix(0, rows=nr, cols=nr); 
-R[1:xr,] = matrix(1, rows=xr, cols=nr);
-
-parfor( i in 1:nr )
-{
-   R[i,i] = NaNval;           
-}   
-
+
+
+nr = $1;
+xr = $2;
+NaNval = 0/0;
+
+R = matrix(0, rows=nr, cols=nr); 
+R[1:xr,] = matrix(1, rows=xr, cols=nr);
+
+parfor( i in 1:nr )
+{
+   R[i,i] = NaNval;           
+}   
+
 write(R, $3);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartition_leftindexing.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartition_leftindexing.dml b/src/test/scripts/functions/parfor/parfor_cdatapartition_leftindexing.dml
index 4acbb93..2c6e1c9 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartition_leftindexing.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartition_leftindexing.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-m = nrow(V);
-n = ncol(V);
-
-R = matrix(0,rows=m,cols=n); 
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, opt=NONE )
-{
-   col = V[,i];
-   if(1==1){}
-   R[,i] = col; 
-}   
-
+
+V = read($1);
+m = nrow(V);
+n = ncol(V);
+
+R = matrix(0,rows=m,cols=n); 
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, opt=NONE )
+{
+   col = V[,i];
+   if(1==1){}
+   R[,i] = col; 
+}   
+
 write(R, $2);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning.R b/src/test/scripts/functions/parfor/parfor_cdatapartitioning.R
index 3b4011e..28fc6bf 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning.R
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-
-R <- array(0,dim=c(1,n))
-
-for( i in 1:n )
-{
-   X <- V[ ,i];                 
-   R[1,i] <- sum(X);
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+
+R <- array(0,dim=c(1,n))
+
+for( i in 1:n )
+{
+   X <- V[ ,i];                 
+   R[1,i] <- sum(X);
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning1.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning1.dml
index 84b6cdf..4a056ea 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning1.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning1.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=NONE, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=NONE, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning2.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning2.dml
index 318df7a..2296bae 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning2.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning2.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, taskpartitioner=FACTORING,  opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, taskpartitioner=FACTORING,  opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning3.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning3.dml
index 74be31b..6429a22 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning3.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning3.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning4.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning4.dml
index f94693e..a236920 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning4.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning4.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning5.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning5.dml
index 8e81e73..7af8684 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning5.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning5.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.R b/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.R
index 0adb517..52493fc 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.R
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-
-R <- array(0,dim=c(1,n))
-
-for( i in 1:n-1 )
-{
-   X <- V[ ,i:(i+1)];                 
-   R[1,i] <- sum(X);
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+
+R <- array(0,dim=c(1,n))
+
+for( i in 1:n-1 )
+{
+   X <- V[ ,i:(i+1)];                 
+   R[1,i] <- sum(X);
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.dml
index fa9fe9f..2d011c0 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning6.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i:(i+1)];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i:(i+1)];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning7.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning7.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning7.dml
index 948745c..316bd1a 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning7.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning7.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning8.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning8.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning8.dml
index 2d2e871..9ae62de 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning8.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning8.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_SPARK,datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_SPARK,datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_cdatapartitioning9.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_cdatapartitioning9.dml b/src/test/scripts/functions/parfor/parfor_cdatapartitioning9.dml
index a77bcf1..eeca0dd 100644
--- a/src/test/scripts/functions/parfor/parfor_cdatapartitioning9.dml
+++ b/src/test/scripts/functions/parfor/parfor_cdatapartitioning9.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i:(i+1)];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i:(i+1)];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_extfunct.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_extfunct.R b/src/test/scripts/functions/parfor/parfor_extfunct.R
index 6fb0af6..88bbf05 100644
--- a/src/test/scripts/functions/parfor/parfor_extfunct.R
+++ b/src/test/scripts/functions/parfor/parfor_extfunct.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-
-R <- array(0,dim=c(n,1))
-
-for( i in 1:n )
-{
-   X <- V[ ,i];                 
-   R[i,1] <- sum(X);
-}   
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+
+R <- array(0,dim=c(n,1))
+
+for( i in 1:n )
+{
+   X <- V[ ,i];                 
+   R[i,1] <- sum(X);
+}   
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_extfunct.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_extfunct.dml b/src/test/scripts/functions/parfor/parfor_extfunct.dml
index e5222f2..94b998c 100644
--- a/src/test/scripts/functions/parfor/parfor_extfunct.dml
+++ b/src/test/scripts/functions/parfor/parfor_extfunct.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-dynRead = externalFunction(String fname, Integer rows, Integer cols, String format)
-return (Matrix[Double] M) 
-implemented in (classname="org.apache.sysml.udf.lib.DynamicReadMatrixCP",exectype="mem")   
-
-execCondense = externalFunction(Matrix[Double] input)
-return(Matrix[Double] out)
-implemented in (classname="org.apache.sysml.udf.lib.RemoveEmptyRows",exectype="file", execlocation="master")   
-
-
-V = read($1, rows=$2, cols=$3, format="text");   
-R = matrix(0, rows=$3,cols=1); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:$3, mode=REMOTE_MR, opt=NONE ) 
-{
-   W = dynRead($1, $2, $3, "textcell");
-   X = V[,i];
-   sumx = sum(X);
-   R[i,1] = dummy * sumx; 
-}  
-
-R = execCondense( R );
-
+
+dynRead = externalFunction(String fname, Integer rows, Integer cols, String format)
+return (Matrix[Double] M) 
+implemented in (classname="org.apache.sysml.udf.lib.DynamicReadMatrixCP",exectype="mem")   
+
+execCondense = externalFunction(Matrix[Double] input)
+return(Matrix[Double] out)
+implemented in (classname="org.apache.sysml.udf.lib.RemoveEmptyRows",exectype="file", execlocation="master")   
+
+
+V = read($1, rows=$2, cols=$3, format="text");   
+R = matrix(0, rows=$3,cols=1); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:$3, mode=REMOTE_MR, opt=NONE ) 
+{
+   W = dynRead($1, $2, $3, "textcell");
+   X = V[,i];
+   sumx = sum(X);
+   R[i,1] = dummy * sumx; 
+}  
+
+R = execCondense( R );
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_funct.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_funct.R b/src/test/scripts/functions/parfor/parfor_funct.R
index 6fb0af6..88bbf05 100644
--- a/src/test/scripts/functions/parfor/parfor_funct.R
+++ b/src/test/scripts/functions/parfor/parfor_funct.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-
-R <- array(0,dim=c(n,1))
-
-for( i in 1:n )
-{
-   X <- V[ ,i];                 
-   R[i,1] <- sum(X);
-}   
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+
+R <- array(0,dim=c(n,1))
+
+for( i in 1:n )
+{
+   X <- V[ ,i];                 
+   R[i,1] <- sum(X);
+}   
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_funct.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_funct.dml b/src/test/scripts/functions/parfor/parfor_funct.dml
index 4e35f58..fad5cb0 100644
--- a/src/test/scripts/functions/parfor/parfor_funct.dml
+++ b/src/test/scripts/functions/parfor/parfor_funct.dml
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-execSum = function(Matrix[Double] X) return (Double sx) 
-{
-   sx = sum(X);    
-}
-
-V = read($1, rows=$2, cols=$3);
-
-R = matrix(0, rows=$3,cols=1); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:$3, mode=REMOTE_MR, opt=NONE ) 
-{
-   X = V[,i];
-   sumx = execSum(X);
-   R[i,1] = dummy * sumx; 
-}  
-
+
+execSum = function(Matrix[Double] X) return (Double sx) 
+{
+   sx = sum(X);    
+}
+
+V = read($1, rows=$2, cols=$3);
+
+R = matrix(0, rows=$3,cols=1); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:$3, mode=REMOTE_MR, opt=NONE ) 
+{
+   X = V[,i];
+   sumx = execSum(X);
+   R[i,1] = dummy * sumx; 
+}  
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals1a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals1a.dml b/src/test/scripts/functions/parfor/parfor_literals1a.dml
index 8bbb344..fd75d23 100644
--- a/src/test/scripts/functions/parfor/parfor_literals1a.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals1a.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
-  return(Boolean success)
-  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
-
-A = read($1, rows=$2, cols=$3, format="text");   
-
-parfor( i in 1:1, mode=LOCAL, opt=NONE ) 
-{
-   B = A + 0; 
-   # param should include internal 'thread root prefix' _t0
-   x = dynWrite(B, $4, "textcell");  
-}  
+
+dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
+  return(Boolean success)
+  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
+
+A = read($1, rows=$2, cols=$3, format="text");   
+
+parfor( i in 1:1, mode=LOCAL, opt=NONE ) 
+{
+   B = A + 0; 
+   # param should include internal 'thread root prefix' _t0
+   x = dynWrite(B, $4, "textcell");  
+}  
       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals1b.dml b/src/test/scripts/functions/parfor/parfor_literals1b.dml
index 2b89cc7..93c6536 100644
--- a/src/test/scripts/functions/parfor/parfor_literals1b.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals1b.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
-  return(Boolean success)
-  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
-
-A = read($1, rows=$2, cols=$3, format="text");   
-
-parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
-{
-   B = A + 0; 
-   # param should include internal 'thread root prefix' _t0
-   x = dynWrite(B, $4, "textcell");  
-}  
+
+dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
+  return(Boolean success)
+  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
+
+A = read($1, rows=$2, cols=$3, format="text");   
+
+parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
+{
+   B = A + 0; 
+   # param should include internal 'thread root prefix' _t0
+   x = dynWrite(B, $4, "textcell");  
+}  
       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals1c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals1c.dml b/src/test/scripts/functions/parfor/parfor_literals1c.dml
index eedd0a9..fdf6aad 100644
--- a/src/test/scripts/functions/parfor/parfor_literals1c.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals1c.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
-  return(Boolean success)
-  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
-
-A = read($1, rows=$2, cols=$3, format="text");   
-
-parfor( i in 1:1, mode=LOCAL, opt=NONE ) 
-{
- parfor( j in 1:1, mode=LOCAL, opt=NONE ) 
- {
-   B = A + 0; 
-   # param should include internal 'thread root prefix' _t0
-   x = dynWrite(B, $4, "textcell");  
- }
-}  
+
+dynWrite = externalFunction(Matrix[Double] input, String fname, String format)
+  return(Boolean success)
+  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
+
+A = read($1, rows=$2, cols=$3, format="text");   
+
+parfor( i in 1:1, mode=LOCAL, opt=NONE ) 
+{
+ parfor( j in 1:1, mode=LOCAL, opt=NONE ) 
+ {
+   B = A + 0; 
+   # param should include internal 'thread root prefix' _t0
+   x = dynWrite(B, $4, "textcell");  
+ }
+}  
       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals2.dml b/src/test/scripts/functions/parfor/parfor_literals2.dml
index 0b1bc1d..855e08d 100644
--- a/src/test/scripts/functions/parfor/parfor_literals2.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals2.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-dynWrite_t0 = externalFunction(Matrix[Double] input, String fname, String format)
-  return(Boolean success)
-  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
-
-A = read($1, rows=$2, cols=$3, format="text");   
-
-parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
-{
-   B = A + 0; 
-   # param should include internal 'thread root prefix'
-   x = dynWrite_t0(B, $4, "textcell");  
-}  
+
+dynWrite_t0 = externalFunction(Matrix[Double] input, String fname, String format)
+  return(Boolean success)
+  implemented in (classname="org.apache.sysml.udf.lib.DynamicWriteMatrixCP",exectype="mem") 
+
+A = read($1, rows=$2, cols=$3, format="text");   
+
+parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
+{
+   B = A + 0; 
+   # param should include internal 'thread root prefix'
+   x = dynWrite_t0(B, $4, "textcell");  
+}  
       
\ No newline at end of file



[27/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/glm/GLM.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/glm/GLM.dml b/src/test/scripts/applications/glm/GLM.dml
index bed88a1..25832c5 100644
--- a/src/test/scripts/applications/glm/GLM.dml
+++ b/src/test/scripts/applications/glm/GLM.dml
@@ -1,1167 +1,1167 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 
-# THIS SCRIPT SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST REGIONS
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X     String  ---     Location to read the matrix X of feature vectors
-# Y     String  ---     Location to read response matrix Y with either 1 or 2 columns:
-#                       if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
-# B     String  ---     Location to store estimated regression parameters (the betas)
-# fmt   String "text"   The betas matrix output format, such as "text" or "csv"
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
-# dfam  Int     1       Distribution family code: 1 = Power, 2 = Binomial
-# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link  Int     0       Link function code: 0 = canonical (depends on distribution),
-#                       1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
-# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
-#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# yneg  Double  0.0     Response value for Bernoulli "No" label, usually 0.0 or -1.0
-# icpt  Int     0       Intercept presence, X columns shifting and rescaling:
-#                       0 = no intercept, no shifting, no rescaling;
-#                       1 = add intercept, but neither shift nor rescale X;
-#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double  0.0     Regularization parameter (lambda) for L2 regularization
-# tol   Double 0.000001 Tolerance (epsilon)
-# disp  Double  0.0     (Over-)dispersion value, or 0.0 to estimate it from data
-# moi   Int     200     Maximum number of outer (Newton / Fisher Scoring) iterations
-# mii   Int     0       Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: Matrix beta, whose size depends on icpt:
-#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
-#
-# In addition, some GLM statistics are provided in CSV format, one comma-separated name-value
-# pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# TERMINATION_CODE      A positive integer indicating success/failure as follows:
-#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
-#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
-# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
-# BETA_MIN_INDEX        Column index for the smallest beta value
-# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
-# BETA_MAX_INDEX        Column index for the largest beta value
-# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
-# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
-#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
-# DISPERSION_EST        Dispersion estimated from the dataset
-# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
-# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
-# -------------------------------------------------------------------------------------------
-#
-# The Log file, when requested, contains the following per-iteration variables in CSV format,
-# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------------
-# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
-# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
-# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
-# OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
-# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
-# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
-# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
-# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
-# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
-# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
-# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
-# TRUST_DELTA           Updated trust region size, the "delta"
-# -------------------------------------------------------------------------------------------
-#
-# Example with distribution = "Binomial.logit":
-# hadoop jar SystemML.jar -f GLM_HOME/GLM.dml -nvargs dfam=2 link=2 yneg=-1.0 icpt=2 reg=0.001
-#     tol=0.00000001 disp=1.0 moi=100 mii=10 X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas 
-#     fmt=csv O=OUTPUT_DIR/stats Log=OUTPUT_DIR/log
-#
-# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
-# AND LINK FUNCTIONS:
-# -----------------------------------------------
-# INPUT PARAMETERS:    MEANING:            Cano-
-# dfam vpow link lpow  Distribution.link   nical?
-# -----------------------------------------------
-#  1   0.0   1  -1.0   Gaussian.inverse
-#  1   0.0   1   0.0   Gaussian.log
-#  1   0.0   1   1.0   Gaussian.id          Yes
-#  1   1.0   1   0.0   Poisson.log          Yes
-#  1   1.0   1   0.5   Poisson.sqrt
-#  1   1.0   1   1.0   Poisson.id
-#  1   2.0   1  -1.0   Gamma.inverse        Yes
-#  1   2.0   1   0.0   Gamma.log
-#  1   2.0   1   1.0   Gamma.id
-#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
-#  1   3.0   1  -1.0   InvGaussian.inverse
-#  1   3.0   1   0.0   InvGaussian.log
-#  1   3.0   1   1.0   InvGaussian.id
-#  1    *    1    *    AnyVariance.AnyLink
-# -----------------------------------------------
-#  2    *    1   0.0   Binomial.log
-#  2    *    1   0.5   Binomial.sqrt
-#  2    *    2    *    Binomial.logit       Yes
-#  2    *    3    *    Binomial.probit
-#  2    *    4    *    Binomial.cloglog
-#  2    *    5    *    Binomial.cauchit
-# -----------------------------------------------
-
-
-# Default values for input parameters
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileO = ifdef ($O, " ");
-fileLog = ifdef ($Log, " ");
-fmtB = ifdef ($fmt, "text");
-
-distribution_type = ifdef ($dfam, 1);                # $dfam = 1;
-variance_as_power_of_the_mean = ifdef ($vpow, 0.0);  # $vpow = 0.0;
-link_type = ifdef ($link, 0);                        # $link = 0;
-link_as_power_of_the_mean = ifdef ($lpow, 1.0);      # $lpow = 1.0;
-bernoulli_No_label = ifdef ($yneg, 0.0);             # $yneg = 0.0;
-intercept_status = ifdef ($icpt, 0);                 # $icpt = 0;
-dispersion = ifdef ($disp, 0.0);                     # $disp = 0.0;
-regularization = ifdef ($reg, 0.0);                  # $reg  = 0.0;
-eps = ifdef ($tol, 0.000001);                        # $tol  = 0.000001;
-max_iteration_IRLS = ifdef ($moi, 200);              # $moi  = 200;
-max_iteration_CG = ifdef ($mii, 0);                  # $mii  = 0;
-
-variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
-link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
-bernoulli_No_label = as.double (bernoulli_No_label);
-dispersion = as.double (dispersion);
-eps = as.double (eps);
-
-
-# Default values for output statistics:
-
-termination_code     = 0.0;
-min_beta             = 0.0 / 0.0;
-i_min_beta           = 0.0 / 0.0;
-max_beta             = 0.0 / 0.0;
-i_max_beta           = 0.0 / 0.0;
-intercept_value      = 0.0 / 0.0;
-dispersion           = 0.0 / 0.0;
-estimated_dispersion = 0.0 / 0.0;
-deviance_nodisp      = 0.0 / 0.0;
-deviance             = 0.0 / 0.0;
-
-print("BEGIN GLM SCRIPT");
-print("Reading X...");
-X = read (fileX);
-print("Reading Y...");
-Y = read (fileY);
-
-num_records  = nrow (X);
-num_features = ncol (X);
-zeros_r = matrix (0, rows = num_records, cols = 1);
-ones_r = 1 + zeros_r;
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-
-if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
-{
-    X = append (X, ones_r);
-    num_features = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = num_features, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
-    scale_lambda [num_features, 1] = 0;
-}
-
-if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
-{                           # Important assumption: X [, num_features] = ones_r
-    avg_X_cols = t(colSums(X)) / num_records;
-    var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
-    is_unsafe = ppred (var_X_cols, 0.0, "<=");
-    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [num_features, 1] = 1;
-    shift_X = - avg_X_cols * scale_X;
-    shift_X [num_features, 1] = 0;
-    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
-} else {
-    scale_X = matrix (1, rows = num_features, cols = 1);
-    shift_X = matrix (0, rows = num_features, cols = 1);
-    rowSums_X_sq = rowSums (X ^ 2);
-}
-
-# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
-# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
-# The transform is then associatively applied to the other side of the expression,
-# and is rewritten via "scale_X" and "shift_X" as follows:
-#
-# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-# ssX_A  = diag (scale_X) %*% A;
-# ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
-
-# Initialize other input-dependent parameters
-
-lambda = scale_lambda * regularization;
-if (max_iteration_CG == 0) {
-    max_iteration_CG = num_features;
-}
-
-# In Bernoulli case, convert one-column "Y" into two-column
-
-if (distribution_type == 2 & ncol(Y) == 1)
-{
-    is_Y_negative = ppred (Y, bernoulli_No_label, "==");
-    Y = append (1 - is_Y_negative, is_Y_negative);
-    count_Y_negative = sum (is_Y_negative);
-    if (count_Y_negative == 0) {
-        stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
-    }
-    if (count_Y_negative == nrow(Y)) {
-        stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
-    }
-}
-
-# Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
-
-if (link_type == 0)
-{
-    if (distribution_type == 1) {
-        link_type = 1;
-        link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
-    } else { if (distribution_type == 2) {
-            link_type = 2;
-}   }   }
-
-# For power distributions and/or links, we use two constants,
-# "variance as power of the mean" and "link_as_power_of_the_mean",
-# to specify the variance and the link as arbitrary powers of the
-# mean.  However, the variance-powers of 1.0 (Poisson family) and
-# 2.0 (Gamma family) have to be treated as special cases, because
-# these values integrate into logarithms.  The link-power of 0.0
-# is also special as it represents the logarithm link.
-
-num_response_columns = ncol (Y);
-
-is_supported = check_if_supported (num_response_columns, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-if (is_supported == 1)
-{
-
-#####   INITIALIZE THE BETAS   #####
-
-[beta, saturated_log_l, isNaN] = 
-    glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
-if (isNaN == 0)
-{
-
-#####  START OF THE MAIN PART  #####
-
-sum_X_sq = sum (rowSums_X_sq);
-trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
-###  max_trust_delta = trust_delta * 10000.0;
-log_l = 0.0;
-deviance_nodisp = 0.0;
-new_deviance_nodisp = 0.0;
-isNaN_log_l = 2;
-newbeta = beta;
-g = matrix (0.0, rows = num_features, cols = 1);
-g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-accept_new_beta = 1;
-reached_trust_boundary = 0;
-neg_log_l_change_predicted = 0.0;
-i_IRLS = 0;
-
-print ("BEGIN IRLS ITERATIONS...");
-
-ssX_newbeta = diag (scale_X) %*% newbeta;
-ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-all_linear_terms = X %*% ssX_newbeta;
-
-[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-    (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-if (isNaN_new_log_l == 0) {
-    new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-    new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-}
-
-if (fileLog != " ") {
-    log_str = "POINT_STEP_NORM," + i_IRLS + "," + sqrt (sum (beta ^ 2));
-    log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
-    log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
-    log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
-} else {
-    log_str = " ";
-}
-
-while (termination_code == 0)
-{
-    accept_new_beta = 1;
-    
-    if (i_IRLS > 0)
-    {
-        if (isNaN_log_l == 0) {
-            accept_new_beta = 0;
-        }
-
-# Decide whether to accept a new iteration point and update the trust region
-# See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-
-        rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
-        if (rho < 0.25 | isNaN_new_log_l == 1) {
-            trust_delta = 0.25 * trust_delta;
-        }
-        if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
-            trust_delta = 2 * trust_delta;
-            
-### if (trust_delta > max_trust_delta) {
-###     trust_delta = max_trust_delta;
-### }
-
-        }
-        if (rho > 0.1 & isNaN_new_log_l == 0) {
-            accept_new_beta = 1;
-        }
-    }
-
-    if (fileLog != " ") {
-        log_str = append (log_str, "IS_POINT_UPDATED," + i_IRLS + "," + accept_new_beta);
-        log_str = append (log_str, "TRUST_DELTA,"      + i_IRLS + "," + trust_delta);
-    }
-    if (accept_new_beta == 1)
-    {
-        beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
-        
-        [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-        
-        # We introduced these variables to avoid roundoff errors:
-        #     g_Y = y_residual / (y_var * link_grad);
-        #     w   = 1.0 / (y_var * link_grad * link_grad);
-                      
-        gXY = - t(X) %*% g_Y;
-        g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
-        g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-        
-        if (fileLog != " ") {
-            log_str = append (log_str, "GRADIENT_NORM," + i_IRLS + "," + g_norm);
-        }
-    }
-    
-    [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
-        get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
-
-    newbeta = beta + z;
-    
-    ssX_newbeta = diag (scale_X) %*% newbeta;
-    ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-    all_linear_terms = X %*% ssX_newbeta;
-    
-    [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
-        (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-    if (isNaN_new_log_l == 0) {
-        new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
-        new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-    }
-        
-    log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
-
-    if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
-        (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )  
-    {
-        termination_code = 1;
-    }
-    rho = - log_l_change / neg_log_l_change_predicted;
-    z_norm = sqrt (sum (z * z));
-    
-    [z_norm_m, z_norm_e] = round_to_print (z_norm);
-    [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
-    [rho_m, rho_e] = round_to_print (rho);
-    [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
-    [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
-    [g_norm_m, g_norm_e] = round_to_print (g_norm);
-
-    i_IRLS = i_IRLS + 1;
-    print ("Iter #" + i_IRLS + " completed"
-        + ", ||z|| = " + z_norm_m + "E" + z_norm_e
-        + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
-        + ", reached = " + reached_trust_boundary
-        + ", ||g|| = " + g_norm_m + "E" + g_norm_e
-        + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
-        + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
-        + ", rho = " + rho_m + "E" + rho_e);
-        
-    if (fileLog != " ") {
-        log_str = append (log_str, "NUM_CG_ITERS,"     + i_IRLS + "," + num_CG_iters);
-        log_str = append (log_str, "IS_TRUST_REACHED," + i_IRLS + "," + reached_trust_boundary);
-        log_str = append (log_str, "POINT_STEP_NORM,"  + i_IRLS + "," + z_norm);
-        log_str = append (log_str, "OBJECTIVE,"        + i_IRLS + "," + (- new_log_l));
-        log_str = append (log_str, "OBJ_DROP_REAL,"    + i_IRLS + "," + log_l_change);
-        log_str = append (log_str, "OBJ_DROP_PRED,"    + i_IRLS + "," + (- neg_log_l_change_predicted));
-        log_str = append (log_str, "OBJ_DROP_RATIO,"   + i_IRLS + "," + rho);
-        log_str = append (log_str, "LINEAR_TERM_MIN,"  + i_IRLS + "," + min (all_linear_terms));
-        log_str = append (log_str, "LINEAR_TERM_MAX,"  + i_IRLS + "," + max (all_linear_terms));
-    }
-        
-    if (i_IRLS == max_iteration_IRLS) {
-        termination_code = 2;
-    }
-}
-
-beta = newbeta;
-log_l = new_log_l;
-deviance_nodisp = new_deviance_nodisp;
-
-if (termination_code == 1) {
-    print ("Converged in " + i_IRLS + " steps.");
-} else {
-    print ("Did not converge.");
-}
-
-ssX_beta = diag (scale_X) %*% beta;
-ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
-if (intercept_status == 2) {
-    beta_out = append (ssX_beta, beta);
-} else {
-    beta_out = ssX_beta;
-}
-
-write (beta_out, fileB, format=fmtB);
-
-if (intercept_status == 1 | intercept_status == 2) {
-    intercept_value = castAsScalar (beta_out [num_features, 1]);
-    beta_noicept = beta_out [1 : (num_features - 1), 1];
-} else {
-    beta_noicept = beta_out [1 : num_features, 1];
-}
-min_beta = min (beta_noicept);
-max_beta = max (beta_noicept);
-tmp_i_min_beta = rowIndexMin (t(beta_noicept))
-i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
-tmp_i_max_beta = rowIndexMax (t(beta_noicept))
-i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
-
-#####  OVER-DISPERSION PART  #####
-
-all_linear_terms = X %*% ssX_beta;
-[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-    
-pearson_residual_sq = g_Y ^ 2 / w;
-pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
-# pearson_residual_sq = (y_residual ^ 2) / y_var;
-
-if (num_records > num_features) {
-    estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
-}
-if (dispersion <= 0.0) {
-    dispersion = estimated_dispersion;
-}
-deviance = deviance_nodisp / dispersion;
-
-if (fileLog != " ") {
-    write (log_str, fileLog);
-}
-
-#####  END OF THE MAIN PART  #####
-
-} else { print ("Input matrices are out of range.  Terminating the DML."); termination_code = 3; }
-} else { print ("Distribution/Link not supported.  Terminating the DML."); termination_code = 4; }
-
-str = "TERMINATION_CODE," + termination_code;
-str = append (str, "BETA_MIN," + min_beta);
-str = append (str, "BETA_MIN_INDEX," + i_min_beta);
-str = append (str, "BETA_MAX," + max_beta);
-str = append (str, "BETA_MAX_INDEX," + i_max_beta);
-str = append (str, "INTERCEPT," + intercept_value);
-str = append (str, "DISPERSION," + dispersion);
-str = append (str, "DISPERSION_EST," + estimated_dispersion);
-str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
-str = append (str, "DEVIANCE_SCALED," + deviance);
-
-if (fileO != " ") {
-    write (str, fileO);
-} else {
-    print (str);
-}
-
-
-
-
-check_if_supported = 
-    function (int ncol_y, int dist_type, double var_power, int link_type, double link_power)
-    return   (int is_supported)
-{
-    is_supported = 0;
-    if (ncol_y == 1 & dist_type == 1 & link_type == 1)
-    { # POWER DISTRIBUTION
-        is_supported = 1;
-        if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse");      } else {
-        if (var_power == 0.0 & link_power ==  0.0) {print ("Gaussian.log");          } else {
-        if (var_power == 0.0 & link_power ==  0.5) {print ("Gaussian.sqrt");         } else {
-        if (var_power == 0.0 & link_power ==  1.0) {print ("Gaussian.id");           } else {
-        if (var_power == 0.0                     ) {print ("Gaussian.power_nonlog"); } else {
-        if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse");       } else {
-        if (var_power == 1.0 & link_power ==  0.0) {print ("Poisson.log");           } else {
-        if (var_power == 1.0 & link_power ==  0.5) {print ("Poisson.sqrt");          } else {
-        if (var_power == 1.0 & link_power ==  1.0) {print ("Poisson.id");            } else {
-        if (var_power == 1.0                     ) {print ("Poisson.power_nonlog");  } else {
-        if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse");         } else {
-        if (var_power == 2.0 & link_power ==  0.0) {print ("Gamma.log");             } else {
-        if (var_power == 2.0 & link_power ==  0.5) {print ("Gamma.sqrt");            } else {
-        if (var_power == 2.0 & link_power ==  1.0) {print ("Gamma.id");              } else {
-        if (var_power == 2.0                     ) {print ("Gamma.power_nonlog");    } else {
-        if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2");    } else {
-        if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse");   } else {
-        if (var_power == 3.0 & link_power ==  0.0) {print ("InvGaussian.log");       } else {
-        if (var_power == 3.0 & link_power ==  0.5) {print ("InvGaussian.sqrt");      } else {
-        if (var_power == 3.0 & link_power ==  1.0) {print ("InvGaussian.id");        } else {
-        if (var_power == 3.0                     ) {print ("InvGaussian.power_nonlog");}else{
-        if (                   link_power ==  0.0) {print ("PowerDist.log");         } else {
-                                                    print ("PowerDist.power_nonlog");
-    }   }}}}} }}}}} }}}}} }}}}} }}
-    if (ncol_y == 1 & dist_type == 2)
-    {
-        print ("Error: Bernoulli response matrix has not been converted into two-column format.");
-    }
-    if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        is_supported = 1;
-        if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse");        } else {
-        if (link_type == 1 & link_power ==  0.0) {print ("Binomial.log");            } else {
-        if (link_type == 1 & link_power ==  0.5) {print ("Binomial.sqrt");           } else {
-        if (link_type == 1 & link_power ==  1.0) {print ("Binomial.id");             } else {
-        if (link_type == 1)                      {print ("Binomial.power_nonlog");   } else {
-        if (link_type == 2)                      {print ("Binomial.logit");          } else {
-        if (link_type == 3)                      {print ("Binomial.probit");         } else {
-        if (link_type == 4)                      {print ("Binomial.cloglog");        } else {
-        if (link_type == 5)                      {print ("Binomial.cauchit");        }
-    }   }}}}} }}}
-    if (is_supported == 0) {
-        print ("Response matrix with " + ncol_y + " columns, distribution family (" + dist_type + ", " + var_power
-             + ") and link family (" + link_type + ", " + link_power + ") are NOT supported together.");
-    }
-}
-
-glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
-return (Matrix[double] beta, double saturated_log_l, int isNaN)
-{
-    saturated_log_l = 0.0;
-    isNaN = 0;
-    y_corr = Y [, 1];
-    if (dist_type == 2) {
-        n_corr = rowSums (Y);
-        is_n_zero = ppred (n_corr, 0.0, "==");
-        y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
-    }
-    linear_terms = y_corr;
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-        if          (link_power ==  0.0) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_power ==  1.0) {
-            linear_terms = y_corr;
-        } else { if (link_power == -1.0) {
-            linear_terms = 1.0 / y_corr;
-        } else { if (link_power ==  0.5) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                linear_terms = sqrt (y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_power >   0.0) {
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-            } else { isNaN = 1; }
-        } else {
-            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-                linear_terms = y_corr ^ link_power;
-            } else { isNaN = 1; }
-        }}}}}
-    }
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { isNaN = 1; }
-        } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
-            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
-                is_zero_y_corr = ppred (y_corr, 0.0, "==");
-                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
-            } else { isNaN = 1; }
-        } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
-            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
-                linear_terms = y_corr ^ link_power;
-            } else { isNaN = 1; }
-        } else { 
-            is_zero_y_corr = ppred (y_corr, 0.0, "<=");
-            is_one_y_corr  = ppred (y_corr, 1.0, ">=");
-            y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
-            if (link_type == 2)                           { # Binomial.logit
-                linear_terms = log (y_corr / (1.0 - y_corr)) 
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 3)                  { # Binomial.probit
-                y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
-                t = sqrt (- 2.0 * log (y_below_half));
-                approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
-                linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 4)                  { # Binomial.cloglog
-                linear_terms = log (- log (1.0 - y_corr))
-                    - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-            } else { if (link_type == 5)                  { # Binomial.cauchit
-                linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
-                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
-        }}  }}}}}
-    }
-    
-    if (isNaN == 0) {
-        [saturated_log_l, isNaN] = 
-            glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
-    }
-    
-    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
-        (dist_type == 2 & link_type >= 2))
-    {    
-        desired_eta = 0.0;
-    } else { if (link_type == 1 & link_power == 0.0) {
-        desired_eta = log (0.5);
-    } else { if (link_type == 1) {
-        desired_eta = 0.5 ^ link_power;
-    } else {
-        desired_eta = 0.5;
-    }}}
-    
-    beta = matrix (0.0, rows = ncol(X), cols = 1);
-    
-    if (desired_eta != 0.0) {
-        if (icept_status == 1 | icept_status == 2) {
-            beta [nrow(beta), 1] = desired_eta;
-        } else {
-            # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
-            # Note that "ssX_transform" is trivial here, hence ignored
-            
-            beta = straightenX (X, 0.000001, max_iter_CG);  
-            beta = beta * desired_eta;
-}   }   }
-
-
-glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
-                     int dist_type, double var_power, int link_type, double link_power)
-    return (Matrix[double] g_Y, Matrix[double] w)
-    # ORIGINALLY we returned more meaningful vectors, namely:
-    # Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
-    # Matrix[double] link_gradient : derivative of the link function
-    # Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
-    # BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
-    # and skip over the "meaningful intermediaries".  Now we output these two variables:
-    #     g_Y = y_residual / (var_function * link_gradient);
-    #     w   = 1.0 / (var_function * link_gradient ^ 2);
-{
-    num_records = nrow (linear_terms);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    ones_r = 1 + zeros_r;
-    g_Y  = zeros_r;
-    w  = zeros_r;
-
-    # Some constants
-
-    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
-    ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = ones_2;
-    p_one_m_one [1, 2] = -1.0;
-    m_one_p_one = ones_2;
-    m_one_p_one [1, 1] = -1.0;
-    zero_one = ones_2;
-    zero_one [1, 1] = 0.0;
-    one_zero = ones_2;
-    one_zero [1, 2] = 0.0;
-    flip_pos = matrix (0, rows = 2, cols = 2);
-    flip_neg = flip_pos;
-    flip_pos [1, 2] = 1;
-    flip_pos [2, 1] = 1;
-    flip_neg [1, 2] = -1;
-    flip_neg [2, 1] = 1;
-    
-    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
-        y_mean = zeros_r;
-        if          (link_power ==  0.0) {
-            y_mean = exp (linear_terms);
-            y_mean_pow = y_mean ^ (1 - var_power);
-            w   = y_mean_pow * y_mean;
-            g_Y = y_mean_pow * (Y - y_mean);
-        } else { if (link_power ==  1.0) {
-            y_mean = linear_terms;
-            w   = y_mean ^ (- var_power);
-            g_Y = w * (Y - y_mean);
-        } else {
-            y_mean = linear_terms ^ (1.0 / link_power);
-            c1  = (1 - var_power) / link_power - 1;
-            c2  = (2 - var_power) / link_power - 2;
-            g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
-            w   = (linear_terms ^ c2) / (link_power ^ 2);
-    }   }}
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-        if (link_type == 1) { # BINOMIAL.POWER LINKS
-            if (link_power == 0.0)  { # Binomial.log
-                vec1 = 1 / (exp (- linear_terms) - 1);
-                g_Y = Y [, 1] - Y [, 2] * vec1;
-                w   = rowSums (Y) * vec1;
-            } else {                  # Binomial.nonlog
-                vec1 = zeros_r;
-                if (link_power == 0.5)  {
-                    vec1 = 1 / (1 - linear_terms ^ 2);
-                } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-                    vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
-                } else {isNaN = 1;}}
-                # We want a "zero-protected" version of
-                #     vec2 = Y [, 1] / linear_terms;
-                is_y_0 = ppred (Y [, 1], 0.0, "==");
-                vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
-                g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
-                w   =  rowSums (Y) * vec1 / link_power ^ 2;
-            }
-        } else {
-            is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-            is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-            is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-            finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-            finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-            if (link_type == 2)                           { # Binomial.logit
-                Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-                Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-                Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-                g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;
-                w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;
-            } else { if (link_type == 3)                  { # Binomial.probit
-                is_lt_pos = ppred (linear_terms, 0.0, ">=");
-                t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-                pt_gp = t_gp * ( 0.254829592 
-                      + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                      + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                      + t_gp * (-1.453152027 
-                      + t_gp *   1.061405429))));
-                the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
-                vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
-                vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);
-                w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
-                g_Y = one_over_sqrt_two_pi * vec2 / vec1;
-            } else { if (link_type == 4)                  { # Binomial.cloglog
-                the_exp = exp (linear_terms)
-                the_exp_exp = exp (- the_exp);
-                is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-                the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
-                g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
-                w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
-            } else { if (link_type == 5)                  { # Binomial.cauchit
-                Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-                Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-                y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
-                var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
-                link_gradient_normalized = (1 + linear_terms ^ 2) * 3.1415926535897932384626433832795;
-                g_Y =  rowSums (Y) * y_residual / (var_function * link_gradient_normalized);
-                w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);
-            }}}}   
-        }
-    }
-}
-
-
-glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,
-        int dist_type, double var_power, int link_type, double link_power)
-    return (double log_l, int isNaN)
-{
-    isNaN = 0;
-    log_l = 0.0;
-    num_records = nrow (Y);
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    
-    if (dist_type == 1 & link_type == 1)
-    { # POWER DISTRIBUTION
-        b_cumulant = zeros_r;
-        natural_parameters = zeros_r;
-        is_natural_parameter_log_zero = zeros_r;
-        if          (var_power == 1.0 & link_power == 0.0)  { # Poisson.log
-            b_cumulant = exp (linear_terms);
-            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-        } else { if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                b_cumulant = linear_terms;
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                b_cumulant = linear_terms ^ 2;
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0 & link_power  > 0.0)  { # Poisson.power_nonlog, pos
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0)  {
-                is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-                b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-                natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-            } else {isNaN = 1;}
-        } else { if (var_power == 1.0)                      { # Poisson.power_nonlog, neg
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = linear_terms ^ (1.0 / link_power);
-                natural_parameters = log (linear_terms) / link_power;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = - log (linear_terms);
-                natural_parameters = - linear_terms;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = log (linear_terms);
-                natural_parameters = - 1.0 / linear_terms;
-            } else {isNaN = 1;}
-        } else { if (var_power == 2.0 & link_power ==  0.0) { # Gamma.log
-            b_cumulant = linear_terms;
-            natural_parameters = - exp (- linear_terms);
-        } else { if (var_power == 2.0)                      { # Gamma.power_nonlog
-            if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                b_cumulant = log (linear_terms) / link_power;
-                natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-            } else {isNaN = 1;}
-        } else { if                    (link_power ==  0.0) { # PowerDist.log
-            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-        } else {                                              # PowerDist.power_nonlog
-            if          (-2 * link_power == 1.0 - var_power) {
-                natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
-            } else { if (-1 * link_power == 1.0 - var_power) {
-                natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
-            } else { if (     link_power == 1.0 - var_power) {
-                natural_parameters = linear_terms / (1.0 - var_power);
-            } else { if ( 2 * link_power == 1.0 - var_power) {
-                natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
-            } else {
-                if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                    power = (1.0 - var_power) / link_power;
-                    natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
-                } else {isNaN = 1;}
-            }}}}
-            if          (-2 * link_power == 2.0 - var_power) {
-                b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
-            } else { if (-1 * link_power == 2.0 - var_power) {
-                b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
-            } else { if (     link_power == 2.0 - var_power) {
-                b_cumulant = linear_terms / (2.0 - var_power);
-            } else { if ( 2 * link_power == 2.0 - var_power) {
-                b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
-            } else {
-                if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
-                    power = (2.0 - var_power) / link_power;
-                    b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
-                } else {isNaN = 1;}
-            }}}}
-        }}}}} }}}}}
-        if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
-            log_l = -1.0 / 0.0;
-            isNaN = 1;
-        }
-        if (isNaN == 0)
-        {
-            log_l = sum (Y * natural_parameters - b_cumulant);
-            if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-                log_l = -1.0 / 0.0;
-                isNaN = 1;
-    }   }   }
-    
-    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
-    { # BINOMIAL/BERNOULLI DISTRIBUTION
-    
-        [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
-        
-        if (isNaN == 0) {            
-            does_prob_contradict = ppred (Y_prob, 0.0, "<=");
-            if (sum (does_prob_contradict * abs (Y)) == 0.0) {
-                log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
-                if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
-                    isNaN = 1;
-                }
-            } else {
-                log_l = -1.0 / 0.0;
-                isNaN = 1;
-    }   }   }
-    
-    if (isNaN == 1) {
-        log_l = - 1.0 / 0.0; 
-    }
-}
-
-
-
-binomial_probability_two_column =
-    function (Matrix[double] linear_terms, int link_type, double link_power)
-    return   (Matrix[double] Y_prob, int isNaN)
-{
-    isNaN = 0;
-    num_records = nrow (linear_terms);
-
-    # Define some auxiliary matrices
-
-    ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = ones_2;
-    p_one_m_one [1, 2] = -1.0;
-    m_one_p_one = ones_2;
-    m_one_p_one [1, 1] = -1.0;
-    zero_one = ones_2;
-    zero_one [1, 1] = 0.0;
-    one_zero = ones_2;
-    one_zero [1, 2] = 0.0;
-
-    zeros_r = matrix (0.0, rows = num_records, cols = 1);
-    ones_r = 1.0 + zeros_r;
-
-    # Begin the function body
-
-    Y_prob = zeros_r %*% ones_2;
-    if (link_type == 1) { # Binomial.power
-        if          (link_power == 0.0) { # Binomial.log
-            Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else { if (link_power == 0.5) { # Binomial.sqrt
-            Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    
-        } else {                          # Binomial.power_nonlog
-            if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
-                Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    
-            } else {isNaN = 1;}
-        }}
-    } else {              # Binomial.non_power
-        is_LT_pos_infinite = ppred (linear_terms,  1.0/0.0, "==");
-        is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
-        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
-        finite_linear_terms = replace (target =        linear_terms, pattern =  1.0/0.0, replacement = 0);
-        finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
-        if (link_type == 2)             { # Binomial.logit
-            Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
-            Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
-        } else { if (link_type == 3)    { # Binomial.probit
-            lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
-            t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-            pt_gp = t_gp * ( 0.254829592 
-                  + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                  + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                  + t_gp * (-1.453152027 
-                  + t_gp *   1.061405429))));
-            the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
-            Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);
-        } else { if (link_type == 4)    { # Binomial.cloglog
-            the_exp = exp (finite_linear_terms);
-            the_exp_exp = exp (- the_exp);
-            is_too_small = ppred (10000000 + the_exp, 10000000, "==");
-            Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
-            Y_prob [, 2] = the_exp_exp;
-        } else { if (link_type == 5)    { # Binomial.cauchit
-            Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
-        } else {
-            isNaN = 1;
-        }}}}
-        Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-}   }            
-
-
-# THE CG-STEIHAUG PROCEDURE SCRIPT
-
-# Apply Conjugate Gradient - Steihaug algorithm in order to approximately minimize
-# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
-# under constraint:  ||z|| <= trust_delta.
-# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this transform
-# is given separately because sparse "X" may become dense after applying the transform.
-#
-get_CG_Steihaug_point =
-    function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,
-    Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)
-    return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)
-{
-    trust_delta_sq = trust_delta ^ 2;
-    size_CG = nrow (g);
-    z = matrix (0.0, rows = size_CG, cols = 1);
-    neg_log_l_change = 0.0;
-    reached_trust_boundary = 0;
-    g_reg = g + lambda * beta;
-    r_CG = g_reg;
-    p_CG = -r_CG;
-    rr_CG = sum(r_CG * r_CG);
-    eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
-    converged_CG = 0;
-    if (rr_CG < eps_CG) {
-        converged_CG = 1;
-    }
-    
-    max_iteration_CG = max_iter_CG;
-    if (max_iteration_CG <= 0) {
-        max_iteration_CG = size_CG;
-    }
-    i_CG = 0;
-    while (converged_CG == 0)
-    {
-        i_CG = i_CG + 1;
-        ssX_p_CG = diag (scale_X) %*% p_CG;
-        ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
-        temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
-        q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];
-        pq_CG = sum (p_CG * q_CG);
-        if (pq_CG <= 0) {
-            pp_CG = sum (p_CG * p_CG);  
-            if (pp_CG > 0) {
-                [z, neg_log_l_change] = 
-                    get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-                reached_trust_boundary = 1;
-            } else {
-                neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-            }
-            converged_CG = 1;
-        }
-        if (converged_CG == 0) {
-            alpha_CG = rr_CG / pq_CG;
-            new_z = z + alpha_CG * p_CG;
-            if (sum(new_z * new_z) >= trust_delta_sq) {
-                pp_CG = sum (p_CG * p_CG);  
-                [z, neg_log_l_change] = 
-                    get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
-                reached_trust_boundary = 1;
-                converged_CG = 1;
-            }
-            if (converged_CG == 0) {
-                z = new_z;
-                old_rr_CG = rr_CG;
-                r_CG = r_CG + alpha_CG * q_CG;
-                rr_CG = sum(r_CG * r_CG);
-                if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
-                    neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
-                    reached_trust_boundary = 0;
-                    converged_CG = 1;
-                }
-                if (converged_CG == 0) {
-                    p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
-}   }   }   }   }
-
-
-# An auxiliary function used twice inside the CG-STEIHAUG loop:
-get_trust_boundary_point = 
-    function (Matrix[double] g, Matrix[double] z, Matrix[double] p, 
-              Matrix[double] q, Matrix[double] r, double pp, double pq, 
-              double trust_delta_sq)
-    return (Matrix[double] new_z, double f_change)
-{
-    zz = sum (z * z);  pz = sum (p * z);
-    sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
-    tau_1 = (- pz + sq_root_d) / pp;
-    tau_2 = (- pz - sq_root_d) / pp;
-    zq = sum (z * q);  gp = sum (g * p);
-    f_extra = 0.5 * sum (z * (r + g));
-    f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
-    f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
-    if (f_change_1 < f_change_2) {
-        new_z = z + (tau_1 * p);
-        f_change = f_change_1;
-    }
-    else {
-        new_z = z + (tau_2 * p);
-        f_change = f_change_2;
-    }
-}
-
-
-# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
-straightenX =
-    function (Matrix[double] X, double eps, int max_iter_CG)
-    return   (Matrix[double] w)
-{
-    w_X = t(colSums(X));
-    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
-    eps_LS = eps * nrow(X);
-
-    # BEGIN LEAST SQUARES
-    
-    r_LS = - w_X;
-    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
-    p_LS = - r_LS;
-    norm_r2_LS = sum (r_LS ^ 2);
-    i_LS = 0;
-    while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
-    {
-        q_LS = t(X) %*% X %*% p_LS;
-        q_LS = q_LS + lambda_LS * p_LS;
-        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
-        z_LS = z_LS + alpha_LS * p_LS;
-        old_norm_r2_LS = norm_r2_LS;
-        r_LS = r_LS + alpha_LS * q_LS;
-        norm_r2_LS = sum (r_LS ^ 2);
-        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
-        i_LS = i_LS + 1;
-    }
-    
-    # END LEAST SQUARES
-    
-    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-}
-
-
-round_to_print = function (double x_to_truncate)
-return (double mantissa, int eee)
-{
-    mantissa = 1.0;
-    eee = 0;
-    positive_infinity = 1.0 / 0.0;
-    x = abs (x_to_truncate);
-    if (x != x / 2.0) {
-        log_ten = log (10.0);
-        d_eee = round (log (x) / log_ten - 0.5);
-        mantissa = round (x * exp (log_ten * (4.0 - d_eee))) / 10000;
-        if (mantissa == 10.0) {
-            mantissa = 1.0;
-            d_eee = d_eee + 1;
-        }
-        if (x_to_truncate < 0.0) {
-            mantissa = - mantissa;
-        }
-        eee = 0;
-        pow_two = 1;
-        res_eee = abs (d_eee);
-        while (res_eee != 0.0) {
-            new_res_eee = round (res_eee / 2.0 - 0.3);
-            if (new_res_eee * 2.0 < res_eee) {
-                eee = eee + pow_two;
-            }
-            res_eee = new_res_eee;
-            pow_two = 2 * pow_two;
-        }
-        if (d_eee < 0.0) {
-            eee = - eee;
-        }
-    } else { mantissa = x_to_truncate; }
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# THIS SCRIPT SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST REGIONS
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X     String  ---     Location to read the matrix X of feature vectors
+# Y     String  ---     Location to read response matrix Y with either 1 or 2 columns:
+#                       if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
+# B     String  ---     Location to store estimated regression parameters (the betas)
+# fmt   String "text"   The betas matrix output format, such as "text" or "csv"
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
+# dfam  Int     1       Distribution family code: 1 = Power, 2 = Binomial
+# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link  Int     0       Link function code: 0 = canonical (depends on distribution),
+#                       1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
+# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
+#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# yneg  Double  0.0     Response value for Bernoulli "No" label, usually 0.0 or -1.0
+# icpt  Int     0       Intercept presence, X columns shifting and rescaling:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg   Double  0.0     Regularization parameter (lambda) for L2 regularization
+# tol   Double 0.000001 Tolerance (epsilon)
+# disp  Double  0.0     (Over-)dispersion value, or 0.0 to estimate it from data
+# moi   Int     200     Maximum number of outer (Newton / Fisher Scoring) iterations
+# mii   Int     0       Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: Matrix beta, whose size depends on icpt:
+#     icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+#
+# In addition, some GLM statistics are provided in CSV format, one comma-separated name-value
+# pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# TERMINATION_CODE      A positive integer indicating success/failure as follows:
+#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
+#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
+# BETA_MIN_INDEX        Column index for the smallest beta value
+# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
+# BETA_MAX_INDEX        Column index for the largest beta value
+# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
+#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+# DISPERSION_EST        Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
+# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
+# -------------------------------------------------------------------------------------------
+#
+# The Log file, when requested, contains the following per-iteration variables in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------------
+# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
+# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
+# OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
+# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
+# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
+# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
+# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
+# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
+# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
+# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
+# TRUST_DELTA           Updated trust region size, the "delta"
+# -------------------------------------------------------------------------------------------
+#
+# Example with distribution = "Binomial.logit":
+# hadoop jar SystemML.jar -f GLM_HOME/GLM.dml -nvargs dfam=2 link=2 yneg=-1.0 icpt=2 reg=0.001
+#     tol=0.00000001 disp=1.0 moi=100 mii=10 X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas 
+#     fmt=csv O=OUTPUT_DIR/stats Log=OUTPUT_DIR/log
+#
+# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
+# AND LINK FUNCTIONS:
+# -----------------------------------------------
+# INPUT PARAMETERS:    MEANING:            Cano-
+# dfam vpow link lpow  Distribution.link   nical?
+# -----------------------------------------------
+#  1   0.0   1  -1.0   Gaussian.inverse
+#  1   0.0   1   0.0   Gaussian.log
+#  1   0.0   1   1.0   Gaussian.id          Yes
+#  1   1.0   1   0.0   Poisson.log          Yes
+#  1   1.0   1   0.5   Poisson.sqrt
+#  1   1.0   1   1.0   Poisson.id
+#  1   2.0   1  -1.0   Gamma.inverse        Yes
+#  1   2.0   1   0.0   Gamma.log
+#  1   2.0   1   1.0   Gamma.id
+#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+#  1   3.0   1  -1.0   InvGaussian.inverse
+#  1   3.0   1   0.0   InvGaussian.log
+#  1   3.0   1   1.0   InvGaussian.id
+#  1    *    1    *    AnyVariance.AnyLink
+# -----------------------------------------------
+#  2    *    1   0.0   Binomial.log
+#  2    *    1   0.5   Binomial.sqrt
+#  2    *    2    *    Binomial.logit       Yes
+#  2    *    3    *    Binomial.probit
+#  2    *    4    *    Binomial.cloglog
+#  2    *    5    *    Binomial.cauchit
+# -----------------------------------------------
+
+
+# Default values for input parameters
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileO = ifdef ($O, " ");
+fileLog = ifdef ($Log, " ");
+fmtB = ifdef ($fmt, "text");
+
+distribution_type = ifdef ($dfam, 1);                # $dfam = 1;
+variance_as_power_of_the_mean = ifdef ($vpow, 0.0);  # $vpow = 0.0;
+link_type = ifdef ($link, 0);                        # $link = 0;
+link_as_power_of_the_mean = ifdef ($lpow, 1.0);      # $lpow = 1.0;
+bernoulli_No_label = ifdef ($yneg, 0.0);             # $yneg = 0.0;
+intercept_status = ifdef ($icpt, 0);                 # $icpt = 0;
+dispersion = ifdef ($disp, 0.0);                     # $disp = 0.0;
+regularization = ifdef ($reg, 0.0);                  # $reg  = 0.0;
+eps = ifdef ($tol, 0.000001);                        # $tol  = 0.000001;
+max_iteration_IRLS = ifdef ($moi, 200);              # $moi  = 200;
+max_iteration_CG = ifdef ($mii, 0);                  # $mii  = 0;
+
+variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
+link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
+bernoulli_No_label = as.double (bernoulli_No_label);
+dispersion = as.double (dispersion);
+eps = as.double (eps);
+
+
+# Default values for output statistics:
+
+termination_code     = 0.0;
+min_beta             = 0.0 / 0.0;
+i_min_beta           = 0.0 / 0.0;
+max_beta             = 0.0 / 0.0;
+i_max_beta           = 0.0 / 0.0;
+intercept_value      = 0.0 / 0.0;
+dispersion           = 0.0 / 0.0;
+estimated_dispersion = 0.0 / 0.0;
+deviance_nodisp      = 0.0 / 0.0;
+deviance             = 0.0 / 0.0;
+
+print("BEGIN GLM SCRIPT");
+print("Reading X...");
+X = read (fileX);
+print("Reading Y...");
+Y = read (fileY);
+
+num_records  = nrow (X);
+num_features = ncol (X);
+zeros_r = matrix (0, rows = num_records, cols = 1);
+ones_r = 1 + zeros_r;
+
+# Introduce the intercept, shift and rescale the columns of X if needed
+
+if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
+{
+    X = append (X, ones_r);
+    num_features = ncol (X);
+}
+
+scale_lambda = matrix (1, rows = num_features, cols = 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+    scale_lambda [num_features, 1] = 0;
+}
+
+if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+{                           # Important assumption: X [, num_features] = ones_r
+    avg_X_cols = t(colSums(X)) / num_records;
+    var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
+    is_unsafe = ppred (var_X_cols, 0.0, "<=");
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [num_features, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [num_features, 1] = 0;
+    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
+} else {
+    scale_X = matrix (1, rows = num_features, cols = 1);
+    shift_X = matrix (0, rows = num_features, cols = 1);
+    rowSums_X_sq = rowSums (X ^ 2);
+}
+
+# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
+# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
+# The transform is then associatively applied to the other side of the expression,
+# and is rewritten via "scale_X" and "shift_X" as follows:
+#
+# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+# ssX_A  = diag (scale_X) %*% A;
+# ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
+#
+# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+# tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
+
+# Initialize other input-dependent parameters
+
+lambda = scale_lambda * regularization;
+if (max_iteration_CG == 0) {
+    max_iteration_CG = num_features;
+}
+
+# In Bernoulli case, convert one-column "Y" into two-column
+
+if (distribution_type == 2 & ncol(Y) == 1)
+{
+    is_Y_negative = ppred (Y, bernoulli_No_label, "==");
+    Y = append (1 - is_Y_negative, is_Y_negative);
+    count_Y_negative = sum (is_Y_negative);
+    if (count_Y_negative == 0) {
+        stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
+    }
+    if (count_Y_negative == nrow(Y)) {
+        stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
+    }
+}
+
+# Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
+
+if (link_type == 0)
+{
+    if (distribution_type == 1) {
+        link_type = 1;
+        link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
+    } else { if (distribution_type == 2) {
+            link_type = 2;
+}   }   }
+
+# For power distributions and/or links, we use two constants,
+# "variance as power of the mean" and "link_as_power_of_the_mean",
+# to specify the variance and the link as arbitrary powers of the
+# mean.  However, the variance-powers of 1.0 (Poisson family) and
+# 2.0 (Gamma family) have to be treated as special cases, because
+# these values integrate into logarithms.  The link-power of 0.0
+# is also special as it represents the logarithm link.
+
+num_response_columns = ncol (Y);
+
+is_supported = check_if_supported (num_response_columns, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+if (is_supported == 1)
+{
+
+#####   INITIALIZE THE BETAS   #####
+
+[beta, saturated_log_l, isNaN] = 
+    glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
+if (isNaN == 0)
+{
+
+#####  START OF THE MAIN PART  #####
+
+sum_X_sq = sum (rowSums_X_sq);
+trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
+###  max_trust_delta = trust_delta * 10000.0;
+log_l = 0.0;
+deviance_nodisp = 0.0;
+new_deviance_nodisp = 0.0;
+isNaN_log_l = 2;
+newbeta = beta;
+g = matrix (0.0, rows = num_features, cols = 1);
+g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+accept_new_beta = 1;
+reached_trust_boundary = 0;
+neg_log_l_change_predicted = 0.0;
+i_IRLS = 0;
+
+print ("BEGIN IRLS ITERATIONS...");
+
+ssX_newbeta = diag (scale_X) %*% newbeta;
+ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+all_linear_terms = X %*% ssX_newbeta;
+
+[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+    (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+if (isNaN_new_log_l == 0) {
+    new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+    new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+}
+
+if (fileLog != " ") {
+    log_str = "POINT_STEP_NORM," + i_IRLS + "," + sqrt (sum (beta ^ 2));
+    log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
+    log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
+    log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
+} else {
+    log_str = " ";
+}
+
+while (termination_code == 0)
+{
+    accept_new_beta = 1;
+    
+    if (i_IRLS > 0)
+    {
+        if (isNaN_log_l == 0) {
+            accept_new_beta = 0;
+        }
+
+# Decide whether to accept a new iteration point and update the trust region
+# See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
+
+        rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
+        if (rho < 0.25 | isNaN_new_log_l == 1) {
+            trust_delta = 0.25 * trust_delta;
+        }
+        if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
+            trust_delta = 2 * trust_delta;
+            
+### if (trust_delta > max_trust_delta) {
+###     trust_delta = max_trust_delta;
+### }
+
+        }
+        if (rho > 0.1 & isNaN_new_log_l == 0) {
+            accept_new_beta = 1;
+        }
+    }
+
+    if (fileLog != " ") {
+        log_str = append (log_str, "IS_POINT_UPDATED," + i_IRLS + "," + accept_new_beta);
+        log_str = append (log_str, "TRUST_DELTA,"      + i_IRLS + "," + trust_delta);
+    }
+    if (accept_new_beta == 1)
+    {
+        beta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
+        
+        [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+        
+        # We introduced these variables to avoid roundoff errors:
+        #     g_Y = y_residual / (y_var * link_grad);
+        #     w   = 1.0 / (y_var * link_grad * link_grad);
+                      
+        gXY = - t(X) %*% g_Y;
+        g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
+        g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+        
+        if (fileLog != " ") {
+            log_str = append (log_str, "GRADIENT_NORM," + i_IRLS + "," + g_norm);
+        }
+    }
+    
+    [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = 
+        get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
+
+    newbeta = beta + z;
+    
+    ssX_newbeta = diag (scale_X) %*% newbeta;
+    ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
+    all_linear_terms = X %*% ssX_newbeta;
+    
+    [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+        (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+    if (isNaN_new_log_l == 0) {
+        new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+        new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+    }
+        
+    log_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
+
+    if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
+        (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )  
+    {
+        termination_code = 1;
+    }
+    rho = - log_l_change / neg_log_l_change_predicted;
+    z_norm = sqrt (sum (z * z));
+    
+    [z_norm_m, z_norm_e] = round_to_print (z_norm);
+    [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
+    [rho_m, rho_e] = round_to_print (rho);
+    [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
+    [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
+    [g_norm_m, g_norm_e] = round_to_print (g_norm);
+
+    i_IRLS = i_IRLS + 1;
+    print ("Iter #" + i_IRLS + " completed"
+        + ", ||z|| = " + z_norm_m + "E" + z_norm_e
+        + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
+        + ", reached = " + reached_trust_boundary
+        + ", ||g|| = " + g_norm_m + "E" + g_norm_e
+        + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
+        + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
+        + ", rho = " + rho_m + "E" + rho_e);
+        
+    if (fileLog != " ") {
+        log_str = append (log_str, "NUM_CG_ITERS,"     + i_IRLS + "," + num_CG_iters);
+        log_str = append (log_str, "IS_TRUST_REACHED," + i_IRLS + "," + reached_trust_boundary);
+        log_str = append (log_str, "POINT_STEP_NORM,"  + i_IRLS + "," + z_norm);
+        log_str = append (log_str, "OBJECTIVE,"        + i_IRLS + "," + (- new_log_l));
+        log_str = append (log_str, "OBJ_DROP_REAL,"    + i_IRLS + "," + log_l_change);
+        log_str = append (log_str, "OBJ_DROP_PRED,"    + i_IRLS + "," + (- neg_log_l_change_predicted));
+        log_str = append (log_str, "OBJ_DROP_RATIO,"   + i_IRLS + "," + rho);
+        log_str = append (log_str, "LINEAR_TERM_MIN,"  + i_IRLS + "," + min (all_linear_terms));
+        log_str = append (log_str, "LINEAR_TERM_MAX,"  + i_IRLS + "," + max (all_linear_terms));
+    }
+        
+    if (i_IRLS == max_iteration_IRLS) {
+        termination_code = 2;
+    }
+}
+
+beta = newbeta;
+log_l = new_log_l;
+deviance_nodisp = new_deviance_nodisp;
+
+if (termination_code == 1) {
+    print ("Converged in " + i_IRLS + " steps.");
+} else {
+    print ("Did not converge.");
+}
+
+ssX_beta = diag (scale_X) %*% beta;
+ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
+if (intercept_status == 2) {
+    beta_out = append (ssX_beta, beta);
+} else {
+    beta_out = ssX_beta;
+}
+
+write (beta_out, fileB, format=fmtB);
+
+if (intercept_status == 1 | intercept_status == 2) {
+    intercept_value = castAsScalar (beta_out [num_features, 1]);
+    beta_noicept = beta_out [1 : (num_features - 1), 1];
+} else {
+    beta_noicept = beta_out [1 : num_features, 1];
+}
+min_beta = min (beta_noicept);
+max_beta = max (beta_noicept);
+tmp_i_min_beta = rowIndexMin (t(beta_noicept))
+i_min_beta = castAsScalar (tmp_i_min_beta [1, 1]);
+tmp_i_max_beta = rowIndexMax (t(beta_noicept))
+i_max_beta = castAsScalar (tmp_i_max_beta [1, 1]);
+
+#####  OVER-DISPERSION PART  #####
+
+all_linear_terms = X %*% ssX_beta;
+[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+    
+pearson_residual_sq = g_Y ^ 2 / w;
+pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
+# pearson_residual_sq = (y_residual ^ 2) / y_var;
+
+if (num_records > num_features) {
+    estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
+}
+if (dispersion <= 0.0) {
+    dispersion = estimated_dispersion;
+}
+deviance = deviance_nodisp / dispersion;
+
+if (fileLog != " ") {
+    write (log_str, fileLog);
+}
+
+#####  END OF THE MAIN PART  #####
+
+} else { print ("Input matrices are out of range.  Terminating the DML."); termination_code = 3; }
+} else { print ("Distribution/Link not supported.  Terminating the DML."); termination_code = 4; }
+
+str = "TERMINATION_CODE," + termination_code;
+str = append (str, "BETA_MIN," + min_beta);
+str = append (str, "BETA_MIN_INDEX," + i_min_beta);
+str = append (str, "BETA_MAX," + max_beta);
+str = append (str, "BETA_MAX_INDEX," + i_max_beta);
+str = append (str, "INTERCEPT," + intercept_value);
+str = append (str, "DISPERSION," + dispersion);
+str = append (str, "DISPERSION_EST," + estimated_dispersion);
+str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
+str = append (str, "DEVIANCE_SCALED," + deviance);
+
+if (fileO != " ") {
+    write (str, fileO);
+} else {
+    print (str);
+}
+
+
+
+
+check_if_supported = 
+    function (int ncol_y, int dist_type, double var_power, int link_type, double link_power)
+    return   (int is_supported)
+{
+    is_supported = 0;
+    if (ncol_y == 1 & dist_type == 1 & link_type == 1)
+    { # POWER DISTRIBUTION
+        is_supported = 1;
+        if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse");      } else {
+        if (var_power == 0.0 & link_power ==  0.0) {print ("Gaussian.log");          } else {
+        if (var_power == 0.0 & link_power ==  0.5) {print ("Gaussian.sqrt");         } else {
+        if (var_power == 0.0 & link_power ==  1.0) {print ("Gaussian.id");           } else {
+        if (var_power == 0.0                     ) {print ("Gaussian.power_nonlog"); } else {
+        if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse");       } else {
+        if (var_power == 1.0 & link_power ==  0.0) {print ("Poisson.log");           } else {
+        if (var_power == 1.0 & link_power ==  0.5) {print ("Poisson.sqrt");          } else {
+        if (var_power == 1.0 & link_power ==  1.0) {print ("Poisson.id");            } else {
+        if (var_power == 1.0                     ) {print ("Poisson.power_nonlog");  } else {
+        if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse");         } else {
+        if (var_power == 2.0 & link_power ==  0.0) {print ("Gamma.log");             } else {
+        if (var_power == 2.0 & link_power ==  0.5) {print ("Gamma.sqrt");            } else {
+        if (var_power == 2.0 & link_power ==  1.0) {print ("Gamma.id");              } else {
+        if (var_power == 2.0                     ) {print ("Gamma.power_nonlog");    } else {
+        if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2");    } else {
+        if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse");   } else {
+        if (var_power == 3.0 & link_power ==  0.0) {print ("InvGaussian.log");       } else {
+        if (var_power == 3.0 & link_power ==  0.5) {print ("InvGaussian.sqrt");      } else {
+        if (var_power == 3.0 & link_power ==  1.0) {print ("InvGaussian.id");        } else {
+        if (var_power == 3.0                     ) {print ("InvGaussian.power_nonlog");}else{
+        if (                   link_power ==  0.0) {print ("PowerDist.log");         } else {
+                                                    print ("PowerDist.power_nonlog");
+    }   }}}}} }}}}} }}}}} }}}}} }}
+    if (ncol_y == 1 & dist_type == 2)
+    {
+        print ("Error: Bernoulli response matrix has not been converted into two-column format.");
+    }
+    if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
+    { # BINOMIAL/BERNOULLI DISTRIBUTION
+        is_supported = 1;
+        if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse");        } else {
+        if (link_type == 1 & link_power ==  0.0) {print ("Binomial.log");            } else {
+        if (link_type == 1 & link_power ==  0.5) {print ("Binomial.sqrt");           } else {
+        if (link_type == 1 & link_power ==  1.0) {print ("Binomial.id");             } else {
+        if (link_type == 1)                      {print ("Binomial.power_nonlog");   } else {
+        if (link_type == 2)                      {print ("Binomial.logit");          } else {
+        if (link_type == 3)                      {print ("Binomial.probit");         } else {
+        if (link_type == 4)                      {print ("Binomial.cloglog");        } else {
+        if (link_type == 5)                      {print ("Binomial.cauchit");        }
+    }   }}}}} }}}
+    if (is_supported == 0) {
+        print ("Response matrix with " + ncol_y + " columns, distribution family (" + dist_type + ", " + var_power
+             + ") and link family (" + link_type + ", " + link_power + ") are NOT supported together.");
+    }
+}
+
+glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
+return (Matrix[double] beta, double saturated_log_l, int isNaN)
+{
+    saturated_log_l = 0.0;
+    isNaN = 0;
+    y_corr = Y [, 1];
+    if (dist_type == 2) {
+        n_corr = rowSums (Y);
+        is_n_zero = ppred (n_corr, 0.0, "==");
+        y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
+    }
+    linear_terms = y_corr;
+    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
+        if          (link_power ==  0.0) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_power ==  1.0) {
+            linear_terms = y_corr;
+        } else { if (link_power == -1.0) {
+            linear_terms = 1.0 / y_corr;
+        } else { if (link_power ==  0.5) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                linear_terms = sqrt (y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_power >   0.0) {
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+            } else { isNaN = 1; }
+        } else {
+            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+                linear_terms = y_corr ^ link_power;
+            } else { isNaN = 1; }
+        }}}}}
+    }
+    if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+    { # BINOMIAL/BERNOULLI DISTRIBUTION
+        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { isNaN = 1; }
+        } else { if (link_type == 1 & link_power >  0.0)  { # Binomial.power_nonlog pos
+            if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+                is_zero_y_corr = ppred (y_corr, 0.0, "==");
+                linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+            } else { isNaN = 1; }
+        } else { if (link_type == 1)                      { # Binomial.power_nonlog neg
+            if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+                linear_terms = y_corr ^ link_power;
+            } else { isNaN = 1; }
+        } else { 
+            is_zero_y_corr = ppred (y_corr, 0.0, "<=");
+            is_one_y_corr  = ppred (y_corr, 1.0, ">=");
+            y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
+            if (link_type == 2)                           { # Binomial.logit
+                linear_terms = log (y_corr / (1.0 - y_corr)) 
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 3)                  { # Binomial.probit
+                y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
+                t = sqrt (- 2.0 * log (y_below_half));
+                approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
+                linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 4)                  { # Binomial.cloglog
+                linear_terms = log (- log (1.0 - y_corr))
+                    - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+            } else { if (link_type == 5)                  { # Binomial.cauchit
+                linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
+                    + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
+        }}  }}}}}
+    }
+    
+    if (isNaN == 0) {
+        [saturated_log_l, isNaN] = 
+            glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
+    }
+    
+    if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
+        (dist_type == 2 & link_type >= 2))
+    {    
+        desired_eta = 0.0;
+    } else { if (link_type == 1 & link_power == 0.0) {
+        desired_eta = log (0.5);
+    } else { if (link_type == 1) {
+        desired_eta = 0.5 ^ link_power;
+    } else {
+        desired_eta = 0.5;
+    }}}
+    
+    beta = matrix (0.0, rows = ncol(X), cols = 1);
+    
+    if (desired_eta != 0.0) {
+        if (icept_status == 1 | icept_status == 2) {
+            beta [nrow(beta), 1] = desired_eta;
+        } else {
+            # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
+            # Note that "ssX_transform" is trivial here, hence ignored
+            
+            beta = straightenX (X, 0.000001, max_iter_CG);  
+            beta = beta * desired_eta;
+}   }   }
+
+
+glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
+                     int dist_type, double var_power, int link_type, double link_power)
+    return (Matrix[double] g_Y, Matrix[double] w)
+    # ORIGINALLY we returned more meaningful vectors, namely:
+    # Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
+    # Matrix[double] link_gradient : derivative of the link function
+    # Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function
+    # BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
+    # and skip over the "meaningful intermediaries".  Now we output these two variables:
+    #     g_Y = y_residual / (var_function * link_gradient);
+    #     w 

<TRUNCATED>


[31/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/arima_box-jenkins/arima.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.dml b/src/test/scripts/applications/arima_box-jenkins/arima.dml
index f9afea9..73052e0 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.dml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.dml
@@ -1,287 +1,287 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Arguments
-# 1st arg: X (one column time series)
-# 2nd arg: max_func_invoc
-# 3rd arg: p (non-seasonal AR order)
-# 4th arg: d (non-seasonal differencing order)
-# 5th arg: q (non-seasonal MA order)
-# 6th arg: P (seasonal AR order)
-# 7th arg: D (seasonal differencing order)
-# 8th arg: Q (seasonal MA order)
-# 9th arg: s (period in terms of number of time-steps)
-# 10th arg: 0/1 (1 means include.mean)
-# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
-# 12th arg: file name to store learnt parameters
-
-#changing to additive sar since R's arima seems to do that
-
-arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
-	b = X[,2:ncol(X)]%*%w
-	
-	R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
-	for(i7 in 1:qIn){
-		ma_ind_ns = P+pIn+i7
-		err_ind_ns = i7
-		ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
-		d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
-		R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
-	}
-	for(i8 in 1:Q){
-		ma_ind_s = P+pIn+qIn+i8
-		err_ind_s = s*i8
-		ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
-		d_s = ones_s * castAsScalar(w[ma_ind_s,1])
-		R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
-	}
-	
-	#checking for strict diagonal dominance
-	#required for jacobi's method
-	#
-	
-	max_iter = 100
-	tol = 0.01
-
-	y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
-  	iter = 0
-  	
-	if(useJacobi == 1){
-		check = sum(ppred(rowSums(abs(R)), 1, ">="))
-		if(check > 0){
-			print("R is not diagonal dominant. Suggest switching to an exact solver.")
-		}
-		
-		diff = tol+1.0
-		while(iter < max_iter & diff > tol){
-    		y_hat_new = b - R%*%y_hat
-    		diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
-    		y_hat = y_hat_new
-    		iter = iter + 1
-    	}
-  	}else{
-	  	ones = matrix(1, rows=nrow(X), cols=1)
-  		A = R + diag(ones)
-  		Z = t(A)%*%A
-  		y = t(A)%*%b
-  		r = -y
-  		p = -r
-  		norm_r2 = sum(r*r)
-  		continue = 1
-  		if(norm_r2 == 0){
-  			continue = 0
-  		}
-  		while(iter < max_iter & continue == 1){
-  			q = Z%*%p
-  			alpha = norm_r2 / castAsScalar(t(p) %*% q)
-  			y_hat = y_hat + alpha * p
-  			old_norm_r2 = norm_r2
-  			r = r + alpha * q
-  			norm_r2 = sum(r * r)
-  			if(norm_r2 < tol){
-  				continue = 0
-  			}
-  			beta = norm_r2 / old_norm_r2
-  			p = -r + beta * p
-  			iter = iter + 1
-  		}
-	}
-	
-  	errs = X[,1] - y_hat
-  	obj = sum(errs*errs)
-}
-
-#input col of time series data
-X = read($1)
-
-max_func_invoc = $2
-
-#non-seasonal order
-p = $3
-d = $4
-q = $5
-
-#seasonal order
-P = $6
-D = $7
-Q = $8
-
-#length of the season
-s = $9
-
-include_mean = $10
-
-useJacobi = $11
-
-num_rows = nrow(X)
-
-if(num_rows <= d){
-	print("non-seasonal differencing order should be larger than length of the time-series")
-}
-
-Y = X
-for(i in 1:d){
-	n1 = nrow(Y)+0.0
-	Y = Y[2:n1,] - Y[1:n1-1,]
-}
-
-num_rows = nrow(Y)+0.0
-if(num_rows <= s*D){
-	print("seasonal differencing order should be larger than number of observations divided by length of season")
-}
-
-for(i in 1:D){
-	n1 = nrow(Y)+0.0
-	Y = Y[s+1:n1,] - Y[1:n1-s,]
-}
-
-n = nrow(Y)
-
-max_ar_col = P+p
-max_ma_col = Q+q
-if(max_ar_col > max_ma_col){
-	max_arma_col = max_ar_col
-}else{
-	max_arma_col = max_ma_col
-}
-
-mu = 0
-if(include_mean == 1){
-	mu = sum(Y)/nrow(Y)
-	Y = Y - mu
-}
-
-totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
-
-Z = Rand(rows=n, cols=totcols, min=0, max=0)
-Z[,1] = Y #target col
-
-parfor(i1 in 1:p, check=0){
-	Z[i1+1:n,1+i1] = Y[1:n-i1,]
-}
-parfor(i2 in 1:P, check=0){
-	Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
-}
-parfor(i5 in 1:q, check=0){
-	Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
-}
-parfor(i6 in 1:Q, check=0){
-	Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
-}
-
-one = Rand(rows=1, cols=1, min=1, max=1)
-
-simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
-for(i in 2:ncol(simplex)){
-	simplex[i-1,i] = 0.1
-}
-
-num_func_invoc = 0
-
-objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
-parfor(i3 in 1:ncol(simplex)){
-	arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
-	objvals[1,i3] = arima_css_obj_val
-}
-num_func_invoc = num_func_invoc + ncol(simplex)
-
-tol = 1.5 * 10^(-8) * castAsScalar(objvals[1,1])
-
-continue = 1
-while(continue == 1 & num_func_invoc <= max_func_invoc) {
-	best_index = 1
-	worst_index = 1
-	for(i in 2:ncol(objvals)){
-		this = castAsScalar(objvals[1,i])
-		that = castAsScalar(objvals[1,best_index])
-  		if(that > this){
-    		best_index = i
-  		}
-  		that = castAsScalar(objvals[1,worst_index])
-  		if(that < this){
-    		worst_index = i
-  		}
-	}
-	
-	best_obj_val = castAsScalar(objvals[1,best_index])
-	worst_obj_val = castAsScalar(objvals[1,worst_index])
-	if(worst_obj_val <= best_obj_val + tol){
-		continue = 0
-	}
-	
-	print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
-	
-	c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
-	
-	x_r = 2*c - simplex[,worst_index]
-	obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
-	num_func_invoc = num_func_invoc + 1
-	
-	if(obj_x_r < best_obj_val){
-		x_e = 2*x_r - c
-		obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
-		num_func_invoc = num_func_invoc + 1
-		
-		if(obj_x_r <= obj_x_e){
-			simplex[,worst_index] = x_r
-			objvals[1,worst_index] = obj_x_r
-		}else{
-			simplex[,worst_index] = x_e
-			objvals[1,worst_index] = obj_x_e
-		}
-	}else{
-		if(obj_x_r < worst_obj_val){
-			simplex[,worst_index] = x_r
-			objvals[1,worst_index] = obj_x_r
-		}
-		
-		x_c_in = (simplex[,worst_index] + c)/2
-		obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
-		num_func_invoc = num_func_invoc + 1
-		
-		if(obj_x_c_in < castAsScalar(objvals[1,worst_index])){
-			simplex[,worst_index] = x_c_in
-			objvals[1,worst_index] = obj_x_c_in
-		}else{
-			if(obj_x_r >= worst_obj_val){
-				best_point = simplex[,best_index]
-				parfor(i4 in 1:ncol(simplex)){
-					if(i4 != best_index){
-						simplex[,i4] = (simplex[,i4] + best_point)/2
-						tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
-						objvals[1,i4] = tmp*one
-					}
-				}
-				num_func_invoc = num_func_invoc + ncol(simplex) - 1
-			}
-		}
-	}
-}
-
-best_point = simplex[,best_index]
-if(include_mean == 1){
-	tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
-	tmp2[1:nrow(best_point),1] = best_point
-	tmp2[nrow(tmp2),1] = mu
-	best_point = tmp2
-}
-
-write(best_point, $12, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Arguments
+# 1st arg: X (one column time series)
+# 2nd arg: max_func_invoc
+# 3rd arg: p (non-seasonal AR order)
+# 4th arg: d (non-seasonal differencing order)
+# 5th arg: q (non-seasonal MA order)
+# 6th arg: P (seasonal AR order)
+# 7th arg: D (seasonal differencing order)
+# 8th arg: Q (seasonal MA order)
+# 9th arg: s (period in terms of number of time-steps)
+# 10th arg: 0/1 (1 means include.mean)
+# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
+# 12th arg: file name to store learnt parameters
+
+#changing to additive sar since R's arima seems to do that
+
+arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
+	b = X[,2:ncol(X)]%*%w
+	
+	R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+	for(i7 in 1:qIn){
+		ma_ind_ns = P+pIn+i7
+		err_ind_ns = i7
+		ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
+		d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
+		R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+	}
+	for(i8 in 1:Q){
+		ma_ind_s = P+pIn+qIn+i8
+		err_ind_s = s*i8
+		ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
+		d_s = ones_s * castAsScalar(w[ma_ind_s,1])
+		R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+	}
+	
+	#checking for strict diagonal dominance
+	#required for jacobi's method
+	#
+	
+	max_iter = 100
+	tol = 0.01
+
+	y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+  	iter = 0
+  	
+	if(useJacobi == 1){
+		check = sum(ppred(rowSums(abs(R)), 1, ">="))
+		if(check > 0){
+			print("R is not diagonal dominant. Suggest switching to an exact solver.")
+		}
+		
+		diff = tol+1.0
+		while(iter < max_iter & diff > tol){
+    		y_hat_new = b - R%*%y_hat
+    		diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+    		y_hat = y_hat_new
+    		iter = iter + 1
+    	}
+  	}else{
+	  	ones = matrix(1, rows=nrow(X), cols=1)
+  		A = R + diag(ones)
+  		Z = t(A)%*%A
+  		y = t(A)%*%b
+  		r = -y
+  		p = -r
+  		norm_r2 = sum(r*r)
+  		continue = 1
+  		if(norm_r2 == 0){
+  			continue = 0
+  		}
+  		while(iter < max_iter & continue == 1){
+  			q = Z%*%p
+  			alpha = norm_r2 / castAsScalar(t(p) %*% q)
+  			y_hat = y_hat + alpha * p
+  			old_norm_r2 = norm_r2
+  			r = r + alpha * q
+  			norm_r2 = sum(r * r)
+  			if(norm_r2 < tol){
+  				continue = 0
+  			}
+  			beta = norm_r2 / old_norm_r2
+  			p = -r + beta * p
+  			iter = iter + 1
+  		}
+	}
+	
+  	errs = X[,1] - y_hat
+  	obj = sum(errs*errs)
+}
+
+#input col of time series data
+X = read($1)
+
+max_func_invoc = $2
+
+#non-seasonal order
+p = $3
+d = $4
+q = $5
+
+#seasonal order
+P = $6
+D = $7
+Q = $8
+
+#length of the season
+s = $9
+
+include_mean = $10
+
+useJacobi = $11
+
+num_rows = nrow(X)
+
+if(num_rows <= d){
+	print("non-seasonal differencing order should be larger than length of the time-series")
+}
+
+Y = X
+for(i in 1:d){
+	n1 = nrow(Y)+0.0
+	Y = Y[2:n1,] - Y[1:n1-1,]
+}
+
+num_rows = nrow(Y)+0.0
+if(num_rows <= s*D){
+	print("seasonal differencing order should be larger than number of observations divided by length of season")
+}
+
+for(i in 1:D){
+	n1 = nrow(Y)+0.0
+	Y = Y[s+1:n1,] - Y[1:n1-s,]
+}
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col){
+	max_arma_col = max_ar_col
+}else{
+	max_arma_col = max_ma_col
+}
+
+mu = 0
+if(include_mean == 1){
+	mu = sum(Y)/nrow(Y)
+	Y = Y - mu
+}
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+
+Z = Rand(rows=n, cols=totcols, min=0, max=0)
+Z[,1] = Y #target col
+
+parfor(i1 in 1:p, check=0){
+	Z[i1+1:n,1+i1] = Y[1:n-i1,]
+}
+parfor(i2 in 1:P, check=0){
+	Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+}
+parfor(i5 in 1:q, check=0){
+	Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+}
+parfor(i6 in 1:Q, check=0){
+	Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+}
+
+one = Rand(rows=1, cols=1, min=1, max=1)
+
+simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
+for(i in 2:ncol(simplex)){
+	simplex[i-1,i] = 0.1
+}
+
+num_func_invoc = 0
+
+objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
+parfor(i3 in 1:ncol(simplex)){
+	arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
+	objvals[1,i3] = arima_css_obj_val
+}
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * 10^(-8) * castAsScalar(objvals[1,1])
+
+continue = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc) {
+	best_index = 1
+	worst_index = 1
+	for(i in 2:ncol(objvals)){
+		this = castAsScalar(objvals[1,i])
+		that = castAsScalar(objvals[1,best_index])
+  		if(that > this){
+    		best_index = i
+  		}
+  		that = castAsScalar(objvals[1,worst_index])
+  		if(that < this){
+    		worst_index = i
+  		}
+	}
+	
+	best_obj_val = castAsScalar(objvals[1,best_index])
+	worst_obj_val = castAsScalar(objvals[1,worst_index])
+	if(worst_obj_val <= best_obj_val + tol){
+		continue = 0
+	}
+	
+	print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+	
+	c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+	
+	x_r = 2*c - simplex[,worst_index]
+	obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
+	num_func_invoc = num_func_invoc + 1
+	
+	if(obj_x_r < best_obj_val){
+		x_e = 2*x_r - c
+		obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+		num_func_invoc = num_func_invoc + 1
+		
+		if(obj_x_r <= obj_x_e){
+			simplex[,worst_index] = x_r
+			objvals[1,worst_index] = obj_x_r
+		}else{
+			simplex[,worst_index] = x_e
+			objvals[1,worst_index] = obj_x_e
+		}
+	}else{
+		if(obj_x_r < worst_obj_val){
+			simplex[,worst_index] = x_r
+			objvals[1,worst_index] = obj_x_r
+		}
+		
+		x_c_in = (simplex[,worst_index] + c)/2
+		obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
+		num_func_invoc = num_func_invoc + 1
+		
+		if(obj_x_c_in < castAsScalar(objvals[1,worst_index])){
+			simplex[,worst_index] = x_c_in
+			objvals[1,worst_index] = obj_x_c_in
+		}else{
+			if(obj_x_r >= worst_obj_val){
+				best_point = simplex[,best_index]
+				parfor(i4 in 1:ncol(simplex)){
+					if(i4 != best_index){
+						simplex[,i4] = (simplex[,i4] + best_point)/2
+						tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
+						objvals[1,i4] = tmp*one
+					}
+				}
+				num_func_invoc = num_func_invoc + ncol(simplex) - 1
+			}
+		}
+	}
+}
+
+best_point = simplex[,best_index]
+if(include_mean == 1){
+	tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
+	tmp2[1:nrow(best_point),1] = best_point
+	tmp2[nrow(tmp2),1] = mu
+	best_point = tmp2
+}
+
+write(best_point, $12, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/arima_box-jenkins/arima.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.pydml b/src/test/scripts/applications/arima_box-jenkins/arima.pydml
index 0c4be73..9b3387c 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.pydml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.pydml
@@ -1,258 +1,258 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Arguments
-# 1st arg: X (one column time series)
-# 2nd arg: max_func_invoc
-# 3rd arg: p (non-seasonal AR order)
-# 4th arg: d (non-seasonal differencing order)
-# 5th arg: q (non-seasonal MA order)
-# 6th arg: P (seasonal AR order)
-# 7th arg: D (seasonal differencing order)
-# 8th arg: Q (seasonal MA order)
-# 9th arg: s (period in terms of number of time-steps)
-# 10th arg: 0/1 (1 means include.mean)
-# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
-# 12th arg: file name to store learnt parameters
-
-#changing to additive sar since R's arima seems to do that
-
-def arima_css(w:matrix[float], X:matrix[float], pIn: int, P: int, qIn: int, Q:int, s:int, useJacobi: int) -> (obj: float):
-    b = dot(X[,2:ncol(X)], w)
-    
-    R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
-    for(i7 in 1:qIn):
-        ma_ind_ns = P+pIn+i7
-        err_ind_ns = i7
-        ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
-        d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
-        R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
-    for(i8 in 1:Q):
-        ma_ind_s = P+pIn+qIn+i8
-        err_ind_s = s*i8
-        ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
-        d_s = ones_s * castAsScalar(w[ma_ind_s,1])
-        R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
-    
-    #checking for strict diagonal dominance
-    #required for jacobi's method
-    
-    max_iter = 100
-    tol = 0.01
-    
-    y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
-    iter = 0
-    
-    if(useJacobi == 1):
-        check = sum(ppred(rowSums(abs(R)), 1, ">="))
-        if(check > 0):
-            print("R is not diagonal dominant. Suggest switching to an exact solver.")
-        diff = tol+1.0
-        while(iter < max_iter & diff > tol):
-            y_hat_new = b - dot(R, y_hat)
-            diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
-            y_hat = y_hat_new
-            iter = iter + 1
-    else:
-        ones = full(1, rows=nrow(X), cols=1)
-        A = R + diag(ones)
-        transpose_A = transpose(A)
-        Z = dot(transpose_A, A)
-        y = dot(transpose_A, b)
-        r = -y
-        p = -r
-        norm_r2 = sum(r*r)
-        continue = 1
-        if(norm_r2 == 0):
-            continue = 0
-        while(iter < max_iter & continue == 1):
-            q = dot(Z, p)
-            transpose_p = transpose(p)
-            alpha = norm_r2 / castAsScalar(dot(transpose_p, q))
-            y_hat = y_hat + alpha * p
-            old_norm_r2 = norm_r2
-            r = r + alpha * q
-            norm_r2 = sum(r * r)
-            if(norm_r2 < tol):
-                continue = 0
-            beta = norm_r2 / old_norm_r2
-            p = -r + beta * p
-            iter = iter + 1    
-    errs = X[,1] - y_hat
-    obj = sum(errs*errs)
-# end arima_css function
-
-#input col of time series data
-X = load($1)
-
-max_func_invoc = $2
-
-#non-seasonal order
-p = $3
-d = $4
-q = $5
-
-#seasonal order
-P = $6
-D = $7
-Q = $8
-
-#length of the season
-s = $9
-
-include_mean = $10
-
-useJacobi = $11
-
-num_rows = nrow(X)
-
-if(num_rows <= d):
-    print("non-seasonal differencing order should be larger than length of the time-series")
-
-Y = X
-for(i in 1:d):
-    n1 = nrow(Y)+0.0
-    Y = Y[2:n1,] - Y[1:n1-1,]
-
-num_rows = nrow(Y)+0.0
-if(num_rows <= s*D):
-    print("seasonal differencing order should be larger than number of observations divided by length of season")
-
-for(i in 1:D):
-    n1 = nrow(Y)+0.0
-    Y = Y[s+1:n1,] - Y[1:n1-s,]
-
-n = nrow(Y)
-
-max_ar_col = P+p
-max_ma_col = Q+q
-if(max_ar_col > max_ma_col):
-    max_arma_col = max_ar_col
-else:
-    max_arma_col = max_ma_col
-
-mu = 0
-if(include_mean == 1):
-    mu = sum(Y)/nrow(Y)
-    Y = Y - mu
-
-totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
-
-Z = Rand(rows=n, cols=totcols, min=0, max=0)
-Z[,1] = Y #target col
-
-parfor(i1 in 1:p, check=0):
-    Z[i1+1:n,1+i1] = Y[1:n-i1,]
-
-parfor(i2 in 1:P, check=0):
-    Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
-
-parfor(i5 in 1:q, check=0):
-    Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
-
-parfor(i6 in 1:Q, check=0):
-    Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
-
-
-one = Rand(rows=1, cols=1, min=1, max=1)
-
-simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
-for(i in 2:ncol(simplex)):
-    simplex[i-1,i] = 0.1
-
-num_func_invoc = 0
-
-objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
-parfor(i3 in 1:ncol(simplex)):
-    arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
-    objvals[1,i3] = arima_css_obj_val
-
-num_func_invoc = num_func_invoc + ncol(simplex)
-
-tol = 1.5 * (10**-8) * castAsScalar(objvals[1,1])
-
-continue = 1
-while(continue == 1 & num_func_invoc <= max_func_invoc):
-    best_index = 1
-    worst_index = 1
-    for(i in 2:ncol(objvals)):
-        this = castAsScalar(objvals[1,i])
-        that = castAsScalar(objvals[1,best_index])
-        if(that > this):
-            best_index = i
-        that = castAsScalar(objvals[1,worst_index])
-        if(that < this):
-            worst_index = i
-    
-    best_obj_val = castAsScalar(objvals[1,best_index])
-    worst_obj_val = castAsScalar(objvals[1,worst_index])
-    if(worst_obj_val <= best_obj_val + tol):
-        continue = 0
-    
-    print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
-    
-    c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
-    
-    x_r = 2*c - simplex[,worst_index]
-    obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
-    num_func_invoc = num_func_invoc + 1
-    
-    if(obj_x_r < best_obj_val):
-        x_e = 2*x_r - c
-        obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
-        num_func_invoc = num_func_invoc + 1
-        
-        if(obj_x_r <= obj_x_e):
-            simplex[,worst_index] = x_r
-            objvals[1,worst_index] = obj_x_r
-        else:
-            simplex[,worst_index] = x_e
-            objvals[1,worst_index] = obj_x_e
-    else:
-        if(obj_x_r < worst_obj_val):
-            simplex[,worst_index] = x_r
-            objvals[1,worst_index] = obj_x_r
-        
-        x_c_in = (simplex[,worst_index] + c)/2
-        obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
-        num_func_invoc = num_func_invoc + 1
-        
-        if(obj_x_c_in < castAsScalar(objvals[1,worst_index])):
-            simplex[,worst_index] = x_c_in
-            objvals[1,worst_index] = obj_x_c_in
-        else:
-            if(obj_x_r >= worst_obj_val):
-                best_point = simplex[,best_index]
-                parfor(i4 in 1:ncol(simplex)):
-                    if(i4 != best_index):
-                        simplex[,i4] = (simplex[,i4] + best_point)/2
-                        tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
-                        objvals[1,i4] = tmp*one
-                num_func_invoc = num_func_invoc + ncol(simplex) - 1
-
-best_point = simplex[,best_index]
-if(include_mean == 1):
-    tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
-    tmp2[1:nrow(best_point),1] = best_point
-    tmp2[nrow(tmp2),1] = mu
-    best_point = tmp2
-
-save(best_point, $12, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Arguments
+# 1st arg: X (one column time series)
+# 2nd arg: max_func_invoc
+# 3rd arg: p (non-seasonal AR order)
+# 4th arg: d (non-seasonal differencing order)
+# 5th arg: q (non-seasonal MA order)
+# 6th arg: P (seasonal AR order)
+# 7th arg: D (seasonal differencing order)
+# 8th arg: Q (seasonal MA order)
+# 9th arg: s (period in terms of number of time-steps)
+# 10th arg: 0/1 (1 means include.mean)
+# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
+# 12th arg: file name to store learnt parameters
+
+#changing to additive sar since R's arima seems to do that
+
+def arima_css(w:matrix[float], X:matrix[float], pIn: int, P: int, qIn: int, Q:int, s:int, useJacobi: int) -> (obj: float):
+    b = dot(X[,2:ncol(X)], w)
+    
+    R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+    for(i7 in 1:qIn):
+        ma_ind_ns = P+pIn+i7
+        err_ind_ns = i7
+        ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
+        d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
+        R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+    for(i8 in 1:Q):
+        ma_ind_s = P+pIn+qIn+i8
+        err_ind_s = s*i8
+        ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
+        d_s = ones_s * castAsScalar(w[ma_ind_s,1])
+        R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+    
+    #checking for strict diagonal dominance
+    #required for jacobi's method
+    
+    max_iter = 100
+    tol = 0.01
+    
+    y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+    iter = 0
+    
+    if(useJacobi == 1):
+        check = sum(ppred(rowSums(abs(R)), 1, ">="))
+        if(check > 0):
+            print("R is not diagonal dominant. Suggest switching to an exact solver.")
+        diff = tol+1.0
+        while(iter < max_iter & diff > tol):
+            y_hat_new = b - dot(R, y_hat)
+            diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+            y_hat = y_hat_new
+            iter = iter + 1
+    else:
+        ones = full(1, rows=nrow(X), cols=1)
+        A = R + diag(ones)
+        transpose_A = transpose(A)
+        Z = dot(transpose_A, A)
+        y = dot(transpose_A, b)
+        r = -y
+        p = -r
+        norm_r2 = sum(r*r)
+        continue = 1
+        if(norm_r2 == 0):
+            continue = 0
+        while(iter < max_iter & continue == 1):
+            q = dot(Z, p)
+            transpose_p = transpose(p)
+            alpha = norm_r2 / castAsScalar(dot(transpose_p, q))
+            y_hat = y_hat + alpha * p
+            old_norm_r2 = norm_r2
+            r = r + alpha * q
+            norm_r2 = sum(r * r)
+            if(norm_r2 < tol):
+                continue = 0
+            beta = norm_r2 / old_norm_r2
+            p = -r + beta * p
+            iter = iter + 1    
+    errs = X[,1] - y_hat
+    obj = sum(errs*errs)
+# end arima_css function
+
+#input col of time series data
+X = load($1)
+
+max_func_invoc = $2
+
+#non-seasonal order
+p = $3
+d = $4
+q = $5
+
+#seasonal order
+P = $6
+D = $7
+Q = $8
+
+#length of the season
+s = $9
+
+include_mean = $10
+
+useJacobi = $11
+
+num_rows = nrow(X)
+
+if(num_rows <= d):
+    print("non-seasonal differencing order should be larger than length of the time-series")
+
+Y = X
+for(i in 1:d):
+    n1 = nrow(Y)+0.0
+    Y = Y[2:n1,] - Y[1:n1-1,]
+
+num_rows = nrow(Y)+0.0
+if(num_rows <= s*D):
+    print("seasonal differencing order should be larger than number of observations divided by length of season")
+
+for(i in 1:D):
+    n1 = nrow(Y)+0.0
+    Y = Y[s+1:n1,] - Y[1:n1-s,]
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col):
+    max_arma_col = max_ar_col
+else:
+    max_arma_col = max_ma_col
+
+mu = 0
+if(include_mean == 1):
+    mu = sum(Y)/nrow(Y)
+    Y = Y - mu
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+
+Z = Rand(rows=n, cols=totcols, min=0, max=0)
+Z[,1] = Y #target col
+
+parfor(i1 in 1:p, check=0):
+    Z[i1+1:n,1+i1] = Y[1:n-i1,]
+
+parfor(i2 in 1:P, check=0):
+    Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+
+parfor(i5 in 1:q, check=0):
+    Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+
+parfor(i6 in 1:Q, check=0):
+    Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+
+
+one = Rand(rows=1, cols=1, min=1, max=1)
+
+simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
+for(i in 2:ncol(simplex)):
+    simplex[i-1,i] = 0.1
+
+num_func_invoc = 0
+
+objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
+parfor(i3 in 1:ncol(simplex)):
+    arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
+    objvals[1,i3] = arima_css_obj_val
+
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * (10**-8) * castAsScalar(objvals[1,1])
+
+continue = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc):
+    best_index = 1
+    worst_index = 1
+    for(i in 2:ncol(objvals)):
+        this = castAsScalar(objvals[1,i])
+        that = castAsScalar(objvals[1,best_index])
+        if(that > this):
+            best_index = i
+        that = castAsScalar(objvals[1,worst_index])
+        if(that < this):
+            worst_index = i
+    
+    best_obj_val = castAsScalar(objvals[1,best_index])
+    worst_obj_val = castAsScalar(objvals[1,worst_index])
+    if(worst_obj_val <= best_obj_val + tol):
+        continue = 0
+    
+    print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+    
+    c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+    
+    x_r = 2*c - simplex[,worst_index]
+    obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
+    num_func_invoc = num_func_invoc + 1
+    
+    if(obj_x_r < best_obj_val):
+        x_e = 2*x_r - c
+        obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+        num_func_invoc = num_func_invoc + 1
+        
+        if(obj_x_r <= obj_x_e):
+            simplex[,worst_index] = x_r
+            objvals[1,worst_index] = obj_x_r
+        else:
+            simplex[,worst_index] = x_e
+            objvals[1,worst_index] = obj_x_e
+    else:
+        if(obj_x_r < worst_obj_val):
+            simplex[,worst_index] = x_r
+            objvals[1,worst_index] = obj_x_r
+        
+        x_c_in = (simplex[,worst_index] + c)/2
+        obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
+        num_func_invoc = num_func_invoc + 1
+        
+        if(obj_x_c_in < castAsScalar(objvals[1,worst_index])):
+            simplex[,worst_index] = x_c_in
+            objvals[1,worst_index] = obj_x_c_in
+        else:
+            if(obj_x_r >= worst_obj_val):
+                best_point = simplex[,best_index]
+                parfor(i4 in 1:ncol(simplex)):
+                    if(i4 != best_index):
+                        simplex[,i4] = (simplex[,i4] + best_point)/2
+                        tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
+                        objvals[1,i4] = tmp*one
+                num_func_invoc = num_func_invoc + ncol(simplex) - 1
+
+best_point = simplex[,best_index]
+if(include_mean == 1):
+    tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
+    tmp2[1:nrow(best_point),1] = best_point
+    tmp2[nrow(tmp2),1] = mu
+    best_point = tmp2
+
+save(best_point, $12, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/Binomial.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/Binomial.dml b/src/test/scripts/applications/ctableStats/Binomial.dml
index 9d2f7f4..a3cbf5a 100644
--- a/src/test/scripts/applications/ctableStats/Binomial.dml
+++ b/src/test/scripts/applications/ctableStats/Binomial.dml
@@ -1,171 +1,171 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS
-# WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME
-
-# Computes binomial parameter  p  (the biased-coin probability)
-# such that  Prob [Binom(n, p) <= m] = alpha
-# Use it for "exact" confidence intervals over p given m, n:
-# For example, for 95%-confidence intervals, use  [p1, p2]
-# such that Prob [Binom(n, p1) <= m-1] = 0.975
-# and       Prob [Binom(n, p2) <= m  ] = 0.025
-binomQuantile =
-    function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector)
-    return (Matrix[double] p_vector)
-{
-    num_rows = nrow (n_vector);
-    p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-    alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
-    p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
-    alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-    
-    for (i in 1:27) {  #  Uses "division by half" method to solve equations
-        p_new = (p_min + p_max) / 2.0;
-        [alpha_p_new] = binomProb (n_vector, m_vector, p_new);
-        move_new_to_max = ppred (alpha_p_new, alpha_vector, "<");
-        p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new;
-        p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min;
-        alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new;
-        alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min;
-    }
-    p_vector = (p_min + p_max) / 2.0;
-}
-
-
-# Computes the cumulative distribution fuction of the binomial distribution,
-# that is,  Prob [Binom(n, p) <= m],  using the incomplete Beta function
-# approximated via a continued fraction, see "Handbook of Mathematical Functions"
-# edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards,
-# 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944
-binomProb =
-    function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector)
-    return (Matrix[double] result)
-{
-    num_rows = nrow (n_vector);
-    num_iterations = 100;
-
-    mean_vector = p_vector * n_vector;
-    is_opposite = ppred (mean_vector, m_vector, "<");
-    l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector;
-    q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector;
-    n_minus_l_vector = n_vector - l_vector;
-    
-    is_result_zero1 = ppred (l_vector, - 0.0000000001, "<");
-    is_result_one1  = ppred (n_minus_l_vector, 0.0000000001, "<");
-    is_result_zero2 = ppred (q_vector, 0.9999999999, ">");
-    is_result_one2  = ppred (q_vector, 0.0000000001, "<");
-    
-    is_result_zero  = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1);
-    is_result_one   = (is_result_one1 + (1 - is_result_one1)  * is_result_one2) * (1 - is_result_zero);
-    
-    result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-    result = result + is_result_one;
-    is_already_done = is_result_zero + is_result_one;
-    still_iterating = 1 - is_already_done;
-    
-    n_vector = (1 - is_already_done) * n_vector + is_already_done * 2;
-    l_vector = (1 - is_already_done) * l_vector + is_already_done;
-    n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done;
-    q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8;
-
-    numer_old = q_vector;
-    denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-    numer =  Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-    denom = 1.0 - q_vector;
-    
-    is_i_even = 1;
-
-    for (i in 1:num_iterations)  #  The continued fraction iterations
-    {
-        is_i_even = 1 - is_i_even;
-        e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
-        if (i > 1) {
-            if (is_i_even == 1) {
-                e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2));
-            }
-            if (is_i_even == 0) {
-                e_term = (i - 1) * (2 * n_vector + (i - 1));
-            }
-            e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1));
-            e_term = e_term * 0.25;
-        }
-        numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating);
-        denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating);
-        numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating);
-        denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating);
-        numer = numer_new;
-        denom = denom_new;
-        
-        abs_denom = abs (denom);
-        denom_too_big = ppred (abs_denom, 10000000000.0, ">");
-        denom_too_small = ppred (abs_denom, 0.0000000001, "<");
-        denom_normal = 1.0 - denom_too_big - denom_too_small;
-        rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal;
-        numer_old = numer_old * rescale_vector;
-        denom_old = denom_old * rescale_vector;
-        numer = numer * rescale_vector;
-        denom = denom * rescale_vector;
-        
-        convergence_check_left  = abs (numer * denom_old - numer_old * denom);
-        convergence_check_right = abs (numer * denom_old) * 0.000000001;
-        has_converged = ppred (convergence_check_left, convergence_check_right, "<=");
-        has_converged = still_iterating * has_converged;
-        still_iterating = still_iterating - has_converged;
-        result = result + has_converged * numer / denom;
-    }
-    
-    result = result + still_iterating * numer / denom;
-    
-    n_vector_not_already_done = (1 - is_already_done) * n_vector;
-    l_vector_not_already_done = (1 - is_already_done) * l_vector;
-    n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector;
-    q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done;
-    one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done;
-    
-    [n_logfact] = logFactorial (n_vector_not_already_done);
-    [l_logfact] = logFactorial (l_vector_not_already_done);
-    [n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done);
-    
-    log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done) 
-        + n_minus_l_vector * log (one_minus_q_vector_not_already_done);
-    updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor));
-    result = is_opposite + (1 - 2 * is_opposite) * updated_result;
-}
-
-
-# Computes the logarithm of the factorial of x >= 0 via the Gamma function 
-# From paper: C. Lanczos "A Precision Approximation of the Gamma Function",
-# Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96
-logFactorial = function (Matrix[double] x) return (Matrix[double] logfact)
-{
-    y = 1.000000000178;
-    y = y + 76.180091729406 / (x + 1);
-    y = y - 86.505320327112 / (x + 2);
-    y = y + 24.014098222230 / (x + 3);
-    y = y -  1.231739516140 / (x + 4);
-    y = y +  0.001208580030 / (x + 5);
-    y = y -  0.000005363820 / (x + 6);
-    logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * PI));
-}
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS
+# WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME
+
+# Computes binomial parameter  p  (the biased-coin probability)
+# such that  Prob [Binom(n, p) <= m] = alpha
+# Use it for "exact" confidence intervals over p given m, n:
+# For example, for 95%-confidence intervals, use  [p1, p2]
+# such that Prob [Binom(n, p1) <= m-1] = 0.975
+# and       Prob [Binom(n, p2) <= m  ] = 0.025
+binomQuantile =
+    function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector)
+    return (Matrix[double] p_vector)
+{
+    num_rows = nrow (n_vector);
+    p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+    alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+    p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+    alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+    
+    for (i in 1:27) {  #  Uses "division by half" method to solve equations
+        p_new = (p_min + p_max) / 2.0;
+        [alpha_p_new] = binomProb (n_vector, m_vector, p_new);
+        move_new_to_max = ppred (alpha_p_new, alpha_vector, "<");
+        p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new;
+        p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min;
+        alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new;
+        alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min;
+    }
+    p_vector = (p_min + p_max) / 2.0;
+}
+
+
+# Computes the cumulative distribution fuction of the binomial distribution,
+# that is,  Prob [Binom(n, p) <= m],  using the incomplete Beta function
+# approximated via a continued fraction, see "Handbook of Mathematical Functions"
+# edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards,
+# 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944
+binomProb =
+    function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector)
+    return (Matrix[double] result)
+{
+    num_rows = nrow (n_vector);
+    num_iterations = 100;
+
+    mean_vector = p_vector * n_vector;
+    is_opposite = ppred (mean_vector, m_vector, "<");
+    l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector;
+    q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector;
+    n_minus_l_vector = n_vector - l_vector;
+    
+    is_result_zero1 = ppred (l_vector, - 0.0000000001, "<");
+    is_result_one1  = ppred (n_minus_l_vector, 0.0000000001, "<");
+    is_result_zero2 = ppred (q_vector, 0.9999999999, ">");
+    is_result_one2  = ppred (q_vector, 0.0000000001, "<");
+    
+    is_result_zero  = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1);
+    is_result_one   = (is_result_one1 + (1 - is_result_one1)  * is_result_one2) * (1 - is_result_zero);
+    
+    result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+    result = result + is_result_one;
+    is_already_done = is_result_zero + is_result_one;
+    still_iterating = 1 - is_already_done;
+    
+    n_vector = (1 - is_already_done) * n_vector + is_already_done * 2;
+    l_vector = (1 - is_already_done) * l_vector + is_already_done;
+    n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done;
+    q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8;
+
+    numer_old = q_vector;
+    denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+    numer =  Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+    denom = 1.0 - q_vector;
+    
+    is_i_even = 1;
+
+    for (i in 1:num_iterations)  #  The continued fraction iterations
+    {
+        is_i_even = 1 - is_i_even;
+        e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+        if (i > 1) {
+            if (is_i_even == 1) {
+                e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2));
+            }
+            if (is_i_even == 0) {
+                e_term = (i - 1) * (2 * n_vector + (i - 1));
+            }
+            e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1));
+            e_term = e_term * 0.25;
+        }
+        numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating);
+        denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating);
+        numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating);
+        denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating);
+        numer = numer_new;
+        denom = denom_new;
+        
+        abs_denom = abs (denom);
+        denom_too_big = ppred (abs_denom, 10000000000.0, ">");
+        denom_too_small = ppred (abs_denom, 0.0000000001, "<");
+        denom_normal = 1.0 - denom_too_big - denom_too_small;
+        rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal;
+        numer_old = numer_old * rescale_vector;
+        denom_old = denom_old * rescale_vector;
+        numer = numer * rescale_vector;
+        denom = denom * rescale_vector;
+        
+        convergence_check_left  = abs (numer * denom_old - numer_old * denom);
+        convergence_check_right = abs (numer * denom_old) * 0.000000001;
+        has_converged = ppred (convergence_check_left, convergence_check_right, "<=");
+        has_converged = still_iterating * has_converged;
+        still_iterating = still_iterating - has_converged;
+        result = result + has_converged * numer / denom;
+    }
+    
+    result = result + still_iterating * numer / denom;
+    
+    n_vector_not_already_done = (1 - is_already_done) * n_vector;
+    l_vector_not_already_done = (1 - is_already_done) * l_vector;
+    n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector;
+    q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done;
+    one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done;
+    
+    [n_logfact] = logFactorial (n_vector_not_already_done);
+    [l_logfact] = logFactorial (l_vector_not_already_done);
+    [n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done);
+    
+    log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done) 
+        + n_minus_l_vector * log (one_minus_q_vector_not_already_done);
+    updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor));
+    result = is_opposite + (1 - 2 * is_opposite) * updated_result;
+}
+
+
+# Computes the logarithm of the factorial of x >= 0 via the Gamma function 
+# From paper: C. Lanczos "A Precision Approximation of the Gamma Function",
+# Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96
+logFactorial = function (Matrix[double] x) return (Matrix[double] logfact)
+{
+    y = 1.000000000178;
+    y = y + 76.180091729406 / (x + 1);
+    y = y - 86.505320327112 / (x + 2);
+    y = y + 24.014098222230 / (x + 3);
+    y = y -  1.231739516140 / (x + 4);
+    y = y +  0.001208580030 / (x + 5);
+    y = y -  0.000005363820 / (x + 6);
+    logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * PI));
+}
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/ctci.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/ctci.dml b/src/test/scripts/applications/ctableStats/ctci.dml
index 22d3044..4aa3ae3 100644
--- a/src/test/scripts/applications/ctableStats/ctci.dml
+++ b/src/test/scripts/applications/ctableStats/ctci.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# CTCI.DML: TWO-ATTRIBUTE CONTINGENCY TABLE CONFIDENCE INTERVAL ANALYSIS for categorical data
-# Computes 95% confidence intervals for binomial ratios using both Wilson and Exact Scores
-# INPUT  1: Dataset as an (N x 2) matrix, input file path/name
-#       Rows: Individual data points
-#      Col 1: Partition attribute (e.g. US State code), must be positive integer
-#      Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
-# INPUT  2: Number of data points N (i.e. input matrix size, rows)
-# INPUT  3: "Null" label code, 0 if there is no "null" label
-# INPUT  4: Head Matrix output file path/name
-# INPUT  5: Body Matrix output file path/name
-# OUTPUT 1: Head Matrix with per-label information:
-#       Rows: One row per each distinct code of the label attribute (1, 2, 3, ...)
-#      Col 1: First column index of the Body Matrix block for this label, or 0 if "null"
-#      Col 2: Overall number of data points with this label
-#      Col 3: Percentage (out of 100) of data points to have this label
-# OUTPUT 2: Body Matrix with per-partition statistics:
-#       Rows: One row per each distinct code of the partition attribute (1, 2, 3, ...)
-#    Columns: Arranged in blocks with the same schema, one block per each non-null label,
-#             with the first column index specified in the Head Matrix
-#    Block Col  0:  Number of points, i.e. the count, with this label in the given partition
-#    Block Col  1:  Percentage (out of 100) of points to have the label vs. all in the partition
-#    Block Col  2:  Small side, 95% confid. int-l (of above percentage), Wilson Score
-#    Block Col  3:  Large side, 95% confid. int-l (of above percentage), Wilson Score
-#    Block Col  4:  Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
-#    Block Col  5:  Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
-#    Block Col  6:  Percentage (out of 100) of points to lie in the partition vs. all with the label
-#    Block Col  7:  Small side, 95% confid. int-l (of above percentage), Wilson Score
-#    Block Col  8:  Large side, 95% confid. int-l (of above percentage), Wilson Score
-#    Block Col  9:  Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
-#    Block Col 10:  Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
-#    Block Col 11-99:  RESERVED and set to zero
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/ctci.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_test_head.mtx PATH/ctci_test_body.mtx
-
-setwd ("test/scripts/applications/ctableStats"); # SET TO THE SCRIPT FOLDER
-source ("Binomial.dml"); # THIS SCRIPT SHOULD BE THERE TOO
-powerOfTen = 10000;      # CONSTANT FOR ROUNDING THE RESULTS
-
-print ("BEGIN CTABLE ANALYSIS SCRIPT");
-print ("Reading the input matrix...");
-InData = read($1, rows = $2, cols = 2, format = "text");
-print ("Computing the contingency table...");
-CT = table (InData [, 1], InData [, 2]);
-# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
-print ("Preparing for the output tables...");
-nullLabel = $3;
-numPartitions = nrow (CT);
-numLabels = ncol (CT);
-cntPartitions = rowSums (CT);
-cntLabels = t(colSums (CT));
-numBodyBlocks = numLabels;
-for (iLabel in 1:numLabels) {
-    if (iLabel == nullLabel) {
-        numBodyBlocks = numBodyBlocks - 1;
-}   }
-numBodyCols = numBodyBlocks * 100;
-HeadMtx = Rand (rows = numLabels, cols = 3, min = 0, max = 0);
-HeadMtx [, 2] = cntLabels;
-HeadMtx [, 3] = 100.0 * cntLabels / sum (cntLabels);
-BodyMtx = Rand (rows = numPartitions, cols = numBodyCols, min = 0, max = 0);
-zeros = Rand (rows = numPartitions, cols = 1, min = 0, max = 0);
-zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
-big_alpha   = 0.975 + zeros;
-small_alpha = 0.025 + zeros;
-iBlock = 0;
-for (iLabel in 1:numLabels)
-{
-    if (iLabel != nullLabel) {
-        if (1==1) {
-            print ("Processing label " + iLabel + ":");
-        }
-        fCol = 1 + iBlock * 100;
-        HeadMtx [iLabel, 1] = fCol + zero;
-        cntPartitionsWithLabel = CT [, iLabel];
-        BodyMtx [, fCol] = cntPartitionsWithLabel;
-
-        print ("    (partition & label) / (all partition) ratios...");
-        
-        cntPartitionsWithLabel_minus_1 = cntPartitionsWithLabel - 1;
-        [ratio1, left_conf_wilson1, right_conf_wilson1] = 
-            wilson_confidence (cntPartitions, cntPartitionsWithLabel);
-        [left_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel_minus_1, big_alpha);
-        [right_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel, small_alpha);
-        
-        BodyMtx [, fCol + 1] = round (ratio1 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 2] = round (left_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 3] = round (right_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 4] = round (left_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 5] = round (right_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
-        
-        print ("    (partition & label) / (all label) ratios...");
-        
-        cntThisLabel = zeros + castAsScalar (cntLabels [iLabel, 1]);
-        [ratio2, left_conf_wilson2, right_conf_wilson2] = 
-            wilson_confidence (cntThisLabel, cntPartitionsWithLabel);
-        [left_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel_minus_1, big_alpha);
-        [right_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel, small_alpha);
-        
-        BodyMtx [, fCol + 6] = round (ratio2 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 7] = round (left_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 8] = round (right_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol + 9] = round (left_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
-        BodyMtx [, fCol +10] = round (right_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
-        
-        iBlock = iBlock + 1;
-}   }
-print ("Writing the output matrices...");
-write (HeadMtx, $4, format="text");
-write (BodyMtx, $5, format="text");
-print ("END CTABLE ANALYSIS SCRIPT");
-
-wilson_confidence = function (Matrix[double] n, Matrix[double] m)
-return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
-{
-    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
-    z_sq_n = z * z * n;
-    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
-    midpt = n * m + z_sq_n / 2;
-    denom = n * n + z_sq_n;
-    ratio = m / n;
-    conf_left  = (midpt - qroot) / denom;
-    conf_right = (midpt + qroot) / denom;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# CTCI.DML: TWO-ATTRIBUTE CONTINGENCY TABLE CONFIDENCE INTERVAL ANALYSIS for categorical data
+# Computes 95% confidence intervals for binomial ratios using both Wilson and Exact Scores
+# INPUT  1: Dataset as an (N x 2) matrix, input file path/name
+#       Rows: Individual data points
+#      Col 1: Partition attribute (e.g. US State code), must be positive integer
+#      Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
+# INPUT  2: Number of data points N (i.e. input matrix size, rows)
+# INPUT  3: "Null" label code, 0 if there is no "null" label
+# INPUT  4: Head Matrix output file path/name
+# INPUT  5: Body Matrix output file path/name
+# OUTPUT 1: Head Matrix with per-label information:
+#       Rows: One row per each distinct code of the label attribute (1, 2, 3, ...)
+#      Col 1: First column index of the Body Matrix block for this label, or 0 if "null"
+#      Col 2: Overall number of data points with this label
+#      Col 3: Percentage (out of 100) of data points to have this label
+# OUTPUT 2: Body Matrix with per-partition statistics:
+#       Rows: One row per each distinct code of the partition attribute (1, 2, 3, ...)
+#    Columns: Arranged in blocks with the same schema, one block per each non-null label,
+#             with the first column index specified in the Head Matrix
+#    Block Col  0:  Number of points, i.e. the count, with this label in the given partition
+#    Block Col  1:  Percentage (out of 100) of points to have the label vs. all in the partition
+#    Block Col  2:  Small side, 95% confid. int-l (of above percentage), Wilson Score
+#    Block Col  3:  Large side, 95% confid. int-l (of above percentage), Wilson Score
+#    Block Col  4:  Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
+#    Block Col  5:  Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
+#    Block Col  6:  Percentage (out of 100) of points to lie in the partition vs. all with the label
+#    Block Col  7:  Small side, 95% confid. int-l (of above percentage), Wilson Score
+#    Block Col  8:  Large side, 95% confid. int-l (of above percentage), Wilson Score
+#    Block Col  9:  Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
+#    Block Col 10:  Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
+#    Block Col 11-99:  RESERVED and set to zero
+#
+# EXAMPLE:
+# hadoop jar SystemML.jar -f PATH/ctci.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_test_head.mtx PATH/ctci_test_body.mtx
+
+setwd ("test/scripts/applications/ctableStats"); # SET TO THE SCRIPT FOLDER
+source ("Binomial.dml"); # THIS SCRIPT SHOULD BE THERE TOO
+powerOfTen = 10000;      # CONSTANT FOR ROUNDING THE RESULTS
+
+print ("BEGIN CTABLE ANALYSIS SCRIPT");
+print ("Reading the input matrix...");
+InData = read($1, rows = $2, cols = 2, format = "text");
+print ("Computing the contingency table...");
+CT = table (InData [, 1], InData [, 2]);
+# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
+print ("Preparing for the output tables...");
+nullLabel = $3;
+numPartitions = nrow (CT);
+numLabels = ncol (CT);
+cntPartitions = rowSums (CT);
+cntLabels = t(colSums (CT));
+numBodyBlocks = numLabels;
+for (iLabel in 1:numLabels) {
+    if (iLabel == nullLabel) {
+        numBodyBlocks = numBodyBlocks - 1;
+}   }
+numBodyCols = numBodyBlocks * 100;
+HeadMtx = Rand (rows = numLabels, cols = 3, min = 0, max = 0);
+HeadMtx [, 2] = cntLabels;
+HeadMtx [, 3] = 100.0 * cntLabels / sum (cntLabels);
+BodyMtx = Rand (rows = numPartitions, cols = numBodyCols, min = 0, max = 0);
+zeros = Rand (rows = numPartitions, cols = 1, min = 0, max = 0);
+zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
+big_alpha   = 0.975 + zeros;
+small_alpha = 0.025 + zeros;
+iBlock = 0;
+for (iLabel in 1:numLabels)
+{
+    if (iLabel != nullLabel) {
+        if (1==1) {
+            print ("Processing label " + iLabel + ":");
+        }
+        fCol = 1 + iBlock * 100;
+        HeadMtx [iLabel, 1] = fCol + zero;
+        cntPartitionsWithLabel = CT [, iLabel];
+        BodyMtx [, fCol] = cntPartitionsWithLabel;
+
+        print ("    (partition & label) / (all partition) ratios...");
+        
+        cntPartitionsWithLabel_minus_1 = cntPartitionsWithLabel - 1;
+        [ratio1, left_conf_wilson1, right_conf_wilson1] = 
+            wilson_confidence (cntPartitions, cntPartitionsWithLabel);
+        [left_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel_minus_1, big_alpha);
+        [right_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel, small_alpha);
+        
+        BodyMtx [, fCol + 1] = round (ratio1 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 2] = round (left_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 3] = round (right_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 4] = round (left_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 5] = round (right_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
+        
+        print ("    (partition & label) / (all label) ratios...");
+        
+        cntThisLabel = zeros + castAsScalar (cntLabels [iLabel, 1]);
+        [ratio2, left_conf_wilson2, right_conf_wilson2] = 
+            wilson_confidence (cntThisLabel, cntPartitionsWithLabel);
+        [left_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel_minus_1, big_alpha);
+        [right_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel, small_alpha);
+        
+        BodyMtx [, fCol + 6] = round (ratio2 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 7] = round (left_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 8] = round (right_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol + 9] = round (left_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
+        BodyMtx [, fCol +10] = round (right_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
+        
+        iBlock = iBlock + 1;
+}   }
+print ("Writing the output matrices...");
+write (HeadMtx, $4, format="text");
+write (BodyMtx, $5, format="text");
+print ("END CTABLE ANALYSIS SCRIPT");
+
+wilson_confidence = function (Matrix[double] n, Matrix[double] m)
+return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
+{
+    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
+    z_sq_n = z * z * n;
+    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
+    midpt = n * m + z_sq_n / 2;
+    denom = n * n + z_sq_n;
+    ratio = m / n;
+    conf_left  = (midpt - qroot) / denom;
+    conf_right = (midpt + qroot) / denom;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/ctci_odds.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/ctci_odds.dml b/src/test/scripts/applications/ctableStats/ctci_odds.dml
index 856b24c..3d97489 100644
--- a/src/test/scripts/applications/ctableStats/ctci_odds.dml
+++ b/src/test/scripts/applications/ctableStats/ctci_odds.dml
@@ -1,178 +1,178 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# CTCI_ODDS.DML: TWO-ATTRIBUTE CONTINGENCY TABLE ODDS-RATIO CONFIDENCE INTERVAL ANALYSIS
-# Computes 95% confidence intervals for odds ratios using a Gaussian approximation for log-odds
-# INPUT  1: Dataset as an (N x 2) matrix, input file path/name
-#       Rows: Individual data points
-#      Col 1: Partition attribute (e.g. US State code), must be positive integer
-#      Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
-# INPUT  2: Number of data points N (i.e. input matrix size, rows)
-# INPUT  3: "Null" label code, 0 if there is no "null" label
-# INPUT  4: Output Matrix file path/name
-# OUTPUT 1: Output Matrix with the following information:
-#       Rows: One row per each distinct pair (partition, label) excluding "null" label
-#     Col  1: Partition attribute value
-#     Col  2: Label attribute value
-#     Col  3: Number of data points with this (partition, label) pair
-#     Col  4: Number of data points with the same partition, but a different label
-#     Col  5: Number of data points with a different partition, but the same label
-#     Col  6: Number of data points with a different partition and a different label
-#     Col  7: The odds ratio
-#     Col  8: Small side of 95%-confidence interval for the odds ratio
-#     Col  9: Large side of 95%-confidence interval for the odds ratio
-#     Col 10: How many sigmas away the log-odds ratio is from zero
-#     Col 11: Chi-squared statistic
-#     Col 12: Cramer's V * 100%
-#     Col 13: Log-odds ratio P-value * 100%
-#     Col 14: Chi-squared P-value * 100%
-#     Col 15: Percentage (out of 100) of data points in this paritition to have this label
-#     Col 16: Small side of 95%-confid. int-l of above percentage, Wilson Score
-#     Col 17: Large side of 95%-confid. int-l of above percentage, Wilson Score
-#     Col 18: Percentage (out of 100) of data points overall to have this label
-#     Col 19: Small side of 95%-confid. int-l of above percentage, Wilson Score
-#     Col 20: Large side of 95%-confid. int-l of above percentage, Wilson Score
-#     Col 21: Percentage (out of 100) of data points overall to lie in this partition
-#     Col 22: Small side of 95%-confid. int-l of above percentage, Wilson Score
-#     Col 23: Large side of 95%-confid. int-l of above percentage, Wilson Score
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/ctci_odds.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_odds_test_output.mtx
-
-powerOfTen = 10000;      # CONSTANT FOR ROUNDING THE RESULTS
-print ("BEGIN CTABLE ANALYSIS SCRIPT");
-print ("Reading the input matrix...");
-InData = read ($1, rows = $2, cols = 2, format = "text");
-numPoints = $2;
-print ("Computing the contingency table...");
-CT = table (InData [, 1], InData [, 2]);
-# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
-print ("Preparing for the output tables...");
-nullLabel = $3;
-numPartitions = nrow (CT);
-numLabels = ncol (CT);
-numOutRows = numPartitions * numLabels;
-if (nullLabel > 0 & nullLabel <= numLabels) {
-    numOutRows = numOutRows - numPartitions;
-}
-cntPartitions = rowSums (CT);
-cntLabels = t(colSums (CT));
-OutMtx = Rand (rows = numOutRows, cols = 23, min = 0, max = 0);
-idx = 0;
-zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
-for (iLabel in 1:numLabels)
-{
-    if (iLabel != nullLabel)
-    {
-        if (1==1) {
-            print ("Processing label " + iLabel + ":");
-        }
-        for (iPartition in 1:numPartitions)
-        {
-            idx = idx + 1;
-            OutMtx [idx,  1] = iPartition + zero;
-            OutMtx [idx,  2] = iLabel + zero;
-
-            n11 = CT [iPartition, iLabel];
-            n01 = cntPartitions [iPartition, 1] - CT [iPartition, iLabel];
-            n10 = cntLabels [iLabel, 1] -  CT [iPartition, iLabel];
-            n00 = numPoints - cntPartitions [iPartition, 1] - cntLabels [iLabel, 1] + CT [iPartition, iLabel];
-            odds_ratio = n11 * n00 / (n01 * n10);
-            sigma_log_odds_ratio = sqrt (1.0 / n00 + 1.0 / n01 + 1.0 / n10 + 1.0 / n11);
-            odds_ratio_interval_small = odds_ratio / exp (1.96 * sigma_log_odds_ratio);
-            odds_ratio_interval_large = odds_ratio * exp (1.96 * sigma_log_odds_ratio);
-            num_sigmas_away = abs (log (odds_ratio) / sigma_log_odds_ratio);
-            chi_diff = n00 * n11 - n01 * n10;
-            chi_denom = (n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11);
-            chi_square = (n00 + n01 + n10 + n11) * chi_diff * chi_diff / chi_denom;
-            cramers_V = sqrt (chi_square / (n00 + n01 + n10 + n11));
-
-            OutMtx [idx,  3] = n11;
-            OutMtx [idx,  4] = n01;
-            OutMtx [idx,  5] = n10;
-            OutMtx [idx,  6] = n00;
-            OutMtx [idx,  7] = round (odds_ratio * powerOfTen) / powerOfTen;
-            OutMtx [idx,  8] = round (odds_ratio_interval_small * powerOfTen) / powerOfTen;
-            OutMtx [idx,  9] = round (odds_ratio_interval_large * powerOfTen) / powerOfTen;
-            OutMtx [idx, 10] = round (num_sigmas_away * powerOfTen) / powerOfTen;
-            OutMtx [idx, 11] = round (chi_square * powerOfTen) / powerOfTen;
-            OutMtx [idx, 12] = round (100.0 * cramers_V * powerOfTen) / powerOfTen;
-            
-            gauss_pts = Rand (rows = 2, cols = 1, min = 0, max = 0);
-            gauss_pts [1, 1] = - num_sigmas_away;
-            gauss_pts [2, 1] = - sqrt (chi_square);
-            gauss_probs = gaussian_probability (gauss_pts);
-            pval_odds = gauss_probs [1, 1] * 2.0;
-            pval_chi2 = gauss_probs [2, 1] * 2.0;
-            
-            OutMtx [idx, 13] = round (100.0 * pval_odds * powerOfTen) / powerOfTen;
-            OutMtx [idx, 14] = round (100.0 * pval_chi2 * powerOfTen) / powerOfTen;
-
-            m_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
-            n_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
-            m_cnt [1, 1] = CT [iPartition, iLabel];
-            n_cnt [1, 1] = cntPartitions [iPartition, 1];
-            m_cnt [2, 1] = cntLabels [iLabel, 1];
-            n_cnt [2, 1] = numPoints + zero;
-            m_cnt [3, 1] = cntPartitions [iPartition, 1];
-            n_cnt [3, 1] = numPoints + zero;
-            [ratios, conf_interval_small, conf_interval_large] = wilson_confidence (n_cnt, m_cnt);
-            OutMtx [idx, 15] = round (100.0 * ratios [1, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 16] = round (100.0 * conf_interval_small [1, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 17] = round (100.0 * conf_interval_large [1, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 18] = round (100.0 * ratios [2, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 19] = round (100.0 * conf_interval_small [2, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 20] = round (100.0 * conf_interval_large [2, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 21] = round (100.0 * ratios [3, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 22] = round (100.0 * conf_interval_small [3, 1] * powerOfTen) / powerOfTen;
-            OutMtx [idx, 23] = round (100.0 * conf_interval_large [3, 1] * powerOfTen) / powerOfTen;
-}   }   }
-
-print ("Writing the output matrix...");
-write (OutMtx, $4, format="text");
-print ("END CTABLE ANALYSIS SCRIPT");
-
-wilson_confidence = function (Matrix[double] n, Matrix[double] m)
-return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
-{
-    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
-    z_sq_n = z * z * n;
-    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
-    midpt = n * m + z_sq_n / 2;
-    denom = n * n + z_sq_n;
-    ratio = m / n;
-    conf_left  = (midpt - qroot) / denom;
-    conf_right = (midpt + qroot) / denom;
-}
-
-gaussian_probability = function (Matrix[double] vector_of_points)
-    return (Matrix[double] vector_of_probabilities)
-{
-    t_gp = 1.0 / (1.0 + abs (vector_of_points) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-    erf_gp = 1.0 - t_gp * ( 0.254829592 
-                 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-                 + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-                 + t_gp * (-1.453152027 
-                 + t_gp *   1.061405429)))) * exp (- vector_of_points * vector_of_points / 2.0);
-    erf_gp = erf_gp * 2.0 * (ppred (vector_of_points, 0.0, ">") - 0.5);
-    vector_of_probabilities = 0.5 + 0.5 * erf_gp;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# CTCI_ODDS.DML: TWO-ATTRIBUTE CONTINGENCY TABLE ODDS-RATIO CONFIDENCE INTERVAL ANALYSIS
+# Computes 95% confidence intervals for odds ratios using a Gaussian approximation for log-odds
+# INPUT  1: Dataset as an (N x 2) matrix, input file path/name
+#       Rows: Individual data points
+#      Col 1: Partition attribute (e.g. US State code), must be positive integer
+#      Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
+# INPUT  2: Number of data points N (i.e. input matrix size, rows)
+# INPUT  3: "Null" label code, 0 if there is no "null" label
+# INPUT  4: Output Matrix file path/name
+# OUTPUT 1: Output Matrix with the following information:
+#       Rows: One row per each distinct pair (partition, label) excluding "null" label
+#     Col  1: Partition attribute value
+#     Col  2: Label attribute value
+#     Col  3: Number of data points with this (partition, label) pair
+#     Col  4: Number of data points with the same partition, but a different label
+#     Col  5: Number of data points with a different partition, but the same label
+#     Col  6: Number of data points with a different partition and a different label
+#     Col  7: The odds ratio
+#     Col  8: Small side of 95%-confidence interval for the odds ratio
+#     Col  9: Large side of 95%-confidence interval for the odds ratio
+#     Col 10: How many sigmas away the log-odds ratio is from zero
+#     Col 11: Chi-squared statistic
+#     Col 12: Cramer's V * 100%
+#     Col 13: Log-odds ratio P-value * 100%
+#     Col 14: Chi-squared P-value * 100%
+#     Col 15: Percentage (out of 100) of data points in this paritition to have this label
+#     Col 16: Small side of 95%-confid. int-l of above percentage, Wilson Score
+#     Col 17: Large side of 95%-confid. int-l of above percentage, Wilson Score
+#     Col 18: Percentage (out of 100) of data points overall to have this label
+#     Col 19: Small side of 95%-confid. int-l of above percentage, Wilson Score
+#     Col 20: Large side of 95%-confid. int-l of above percentage, Wilson Score
+#     Col 21: Percentage (out of 100) of data points overall to lie in this partition
+#     Col 22: Small side of 95%-confid. int-l of above percentage, Wilson Score
+#     Col 23: Large side of 95%-confid. int-l of above percentage, Wilson Score
+#
+# EXAMPLE:
+# hadoop jar SystemML.jar -f PATH/ctci_odds.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_odds_test_output.mtx
+
+powerOfTen = 10000;      # CONSTANT FOR ROUNDING THE RESULTS
+print ("BEGIN CTABLE ANALYSIS SCRIPT");
+print ("Reading the input matrix...");
+InData = read ($1, rows = $2, cols = 2, format = "text");
+numPoints = $2;
+print ("Computing the contingency table...");
+CT = table (InData [, 1], InData [, 2]);
+# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
+print ("Preparing for the output tables...");
+nullLabel = $3;
+numPartitions = nrow (CT);
+numLabels = ncol (CT);
+numOutRows = numPartitions * numLabels;
+if (nullLabel > 0 & nullLabel <= numLabels) {
+    numOutRows = numOutRows - numPartitions;
+}
+cntPartitions = rowSums (CT);
+cntLabels = t(colSums (CT));
+OutMtx = Rand (rows = numOutRows, cols = 23, min = 0, max = 0);
+idx = 0;
+zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
+for (iLabel in 1:numLabels)
+{
+    if (iLabel != nullLabel)
+    {
+        if (1==1) {
+            print ("Processing label " + iLabel + ":");
+        }
+        for (iPartition in 1:numPartitions)
+        {
+            idx = idx + 1;
+            OutMtx [idx,  1] = iPartition + zero;
+            OutMtx [idx,  2] = iLabel + zero;
+
+            n11 = CT [iPartition, iLabel];
+            n01 = cntPartitions [iPartition, 1] - CT [iPartition, iLabel];
+            n10 = cntLabels [iLabel, 1] -  CT [iPartition, iLabel];
+            n00 = numPoints - cntPartitions [iPartition, 1] - cntLabels [iLabel, 1] + CT [iPartition, iLabel];
+            odds_ratio = n11 * n00 / (n01 * n10);
+            sigma_log_odds_ratio = sqrt (1.0 / n00 + 1.0 / n01 + 1.0 / n10 + 1.0 / n11);
+            odds_ratio_interval_small = odds_ratio / exp (1.96 * sigma_log_odds_ratio);
+            odds_ratio_interval_large = odds_ratio * exp (1.96 * sigma_log_odds_ratio);
+            num_sigmas_away = abs (log (odds_ratio) / sigma_log_odds_ratio);
+            chi_diff = n00 * n11 - n01 * n10;
+            chi_denom = (n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11);
+            chi_square = (n00 + n01 + n10 + n11) * chi_diff * chi_diff / chi_denom;
+            cramers_V = sqrt (chi_square / (n00 + n01 + n10 + n11));
+
+            OutMtx [idx,  3] = n11;
+            OutMtx [idx,  4] = n01;
+            OutMtx [idx,  5] = n10;
+            OutMtx [idx,  6] = n00;
+            OutMtx [idx,  7] = round (odds_ratio * powerOfTen) / powerOfTen;
+            OutMtx [idx,  8] = round (odds_ratio_interval_small * powerOfTen) / powerOfTen;
+            OutMtx [idx,  9] = round (odds_ratio_interval_large * powerOfTen) / powerOfTen;
+            OutMtx [idx, 10] = round (num_sigmas_away * powerOfTen) / powerOfTen;
+            OutMtx [idx, 11] = round (chi_square * powerOfTen) / powerOfTen;
+            OutMtx [idx, 12] = round (100.0 * cramers_V * powerOfTen) / powerOfTen;
+            
+            gauss_pts = Rand (rows = 2, cols = 1, min = 0, max = 0);
+            gauss_pts [1, 1] = - num_sigmas_away;
+            gauss_pts [2, 1] = - sqrt (chi_square);
+            gauss_probs = gaussian_probability (gauss_pts);
+            pval_odds = gauss_probs [1, 1] * 2.0;
+            pval_chi2 = gauss_probs [2, 1] * 2.0;
+            
+            OutMtx [idx, 13] = round (100.0 * pval_odds * powerOfTen) / powerOfTen;
+            OutMtx [idx, 14] = round (100.0 * pval_chi2 * powerOfTen) / powerOfTen;
+
+            m_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
+            n_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
+            m_cnt [1, 1] = CT [iPartition, iLabel];
+            n_cnt [1, 1] = cntPartitions [iPartition, 1];
+            m_cnt [2, 1] = cntLabels [iLabel, 1];
+            n_cnt [2, 1] = numPoints + zero;
+            m_cnt [3, 1] = cntPartitions [iPartition, 1];
+            n_cnt [3, 1] = numPoints + zero;
+            [ratios, conf_interval_small, conf_interval_large] = wilson_confidence (n_cnt, m_cnt);
+            OutMtx [idx, 15] = round (100.0 * ratios [1, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 16] = round (100.0 * conf_interval_small [1, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 17] = round (100.0 * conf_interval_large [1, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 18] = round (100.0 * ratios [2, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 19] = round (100.0 * conf_interval_small [2, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 20] = round (100.0 * conf_interval_large [2, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 21] = round (100.0 * ratios [3, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 22] = round (100.0 * conf_interval_small [3, 1] * powerOfTen) / powerOfTen;
+            OutMtx [idx, 23] = round (100.0 * conf_interval_large [3, 1] * powerOfTen) / powerOfTen;
+}   }   }
+
+print ("Writing the output matrix...");
+write (OutMtx, $4, format="text");
+print ("END CTABLE ANALYSIS SCRIPT");
+
+wilson_confidence = function (Matrix[double] n, Matrix[double] m)
+return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
+{
+    z = 1.96;      # 97.5% normal percentile, for 95% confidence interval
+    z_sq_n = z * z * n;
+    qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
+    midpt = n * m + z_sq_n / 2;
+    denom = n * n + z_sq_n;
+    ratio = m / n;
+    conf_left  = (midpt - qroot) / denom;
+    conf_right = (midpt + qroot) / denom;
+}
+
+gaussian_probability = function (Matrix[double] vector_of_points)
+    return (Matrix[double] vector_of_probabilities)
+{
+    t_gp = 1.0 / (1.0 + abs (vector_of_points) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
+    erf_gp = 1.0 - t_gp * ( 0.254829592 
+                 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
+                 + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
+                 + t_gp * (-1.453152027 
+                 + t_gp *   1.061405429)))) * exp (- vector_of_points * vector_of_points / 2.0);
+    erf_gp = erf_gp * 2.0 * (ppred (vector_of_points, 0.0, ">") - 0.5);
+    vector_of_probabilities = 0.5 + 0.5 * erf_gp;
+}
+


[43/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/StepLinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/StepLinearRegDS.dml b/scripts/algorithms/StepLinearRegDS.dml
index afd94ed..953402f 100644
--- a/scripts/algorithms/StepLinearRegDS.dml
+++ b/scripts/algorithms/StepLinearRegDS.dml
@@ -1,388 +1,388 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT CHOOSES A LINEAR MODEL IN A STEPWISE ALGIRITHM USING AIC
-# EACH LINEAR REGRESSION USES A DIRECT SOLVER FOR (X^T X) beta = X^T y
-#
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME    TYPE    DEFAULT    MEANING
-# --------------------------------------------------------------------------------------------
-# X       String   	---      Location (on HDFS) to read the matrix X of feature vectors
-# Y       String   	---      Location (on HDFS) to read the 1-column matrix Y of response values
-# B       String   	---      Location to store estimated regression parameters (the betas)
-# S       String    ---      Location to write the selected features ordered as computed by the algorithm
-# O       String   	" "      Location to write the printed statistics; by default is standard output
-# icpt    Int        0       Intercept presence, shifting and rescaling the columns of X:
-#                            0 = no intercept, no shifting, no rescaling;
-#                            1 = add intercept, but neither shift nor rescale X;
-#                            2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# thr     Double    0.01     Threshold to stop the algorithm: if the decrease in the value of AIC falls below thr
-#                            no further features are being checked and the algorithm stops 
-# fmt     String   "text"    Matrix output format for B (the betas) only, usually "text" or "csv"
-# --------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                        Col.2: betas for shifted/rescaled X and intercept
-#
-# In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
-# name-value pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------
-# AVG_TOT_Y             Average of the response value Y
-# STDEV_TOT_Y           Standard Deviation of the response value Y
-# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
-# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
-# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
-# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
-# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
-# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
-# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
-# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
-# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
-# -------------------------------------------------------------------------------------
-# * The last two statistics are only printed if there is no intercept (icpt=0)
-# If the best AIC is achieved without any features the matrix of selected features contains 0.  
-# Moreover, in this case no further statistics will be produced  
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f StepLinearRegDS.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
-#     O=OUTPUT_DIR/stats S=OUTPUT_DIR/selected icpt=2 thr=0.01 fmt=csv
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileS = $S;
-
-# currently only the forward selection strategy in supported: start from one feature and iteratively add 
-# features until AIC improves
-dir = "forward";
-
-fmt  = ifdef ($fmt, "text");
-intercept_status = ifdef ($icpt, 0);   
-thr = ifdef ($thr, 0.01);  
-
-print ("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
-print ("Reading X and Y...");
-X_orig = read (fileX);
-y = read (fileY);
-
-n = nrow (X_orig);
-m_orig = ncol (X_orig);
-
-# BEGIN STEPWISE LINEAR REGRESSION
-
-if (dir == "forward") {  
-	
-	continue = TRUE;
-	columns_fixed = matrix (0, rows = 1, cols = m_orig);
-	columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
-	
-	# X_global stores the best model found at each step 
-	X_global = matrix (0, rows = n, cols = 1);
-	
-	if (intercept_status == 1 | intercept_status == 2) {
-		beta = mean (y);
-		AIC_best = 2 + n * log(sum((beta - y)^2) / n);
-	} else {
-		beta = 0;
-		AIC_best = n * log(sum(y^2) / n);
-	}
-	AICs = matrix (AIC_best, rows = 1, cols = m_orig);
-	print ("Best AIC without any features: " + AIC_best);
-	
-	# First pass to examine single features
-	parfor (i in 1:m_orig) { 	
-		[AIC_1] = linear_regression (X_orig[,i], y, m_orig, columns_fixed_ordered, " ");					
-		AICs[1,i] = AIC_1;
-	}
-
-	# Determine the best AIC 
-	column_best = 0;	
-	for (k in 1:m_orig) {
-		AIC_cur = as.scalar (AICs[1,k]);
-		if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) ) {
-			column_best = k;
-			AIC_best = as.scalar(AICs[1,k]);
-		}
-	}
-	
-	if (column_best == 0) {
-		print ("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
-		S = matrix (0, rows=1, cols=1);
-		if (intercept_status == 0) {
-			B = matrix (beta, rows = m_orig, cols = 1);
-		} else {
-			B_tmp = matrix (0, rows = m_orig + 1, cols = 1);
-			B_tmp[m_orig + 1,] = beta;
-			B = B_tmp;
-		}
-		write (S, fileS, format=fmt);
-		write (B, fileB, format=fmt);
-		stop ("");
-	}
-	print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);	
-	columns_fixed[1,column_best] = 1;
-	columns_fixed_ordered[1,1] = column_best;
-	X_global = X_orig[,column_best];		
-		
-	while (continue) {
-		# Subsequent passes over the features
-		parfor (i in 1:m_orig) { 
-			if (as.scalar(columns_fixed[1,i]) == 0) {	
-			
-				# Construct the feature matrix
-				X = append (X_global, X_orig[,i]);
-				
-				[AIC_2] = linear_regression (X, y, m_orig, columns_fixed_ordered, " ");
-				AICs[1,i] = AIC_2;
-			}	
-		}
-	
-		# Determine the best AIC
-		for (k in 1:m_orig) {
-			AIC_cur = as.scalar (AICs[1,k]);
-			if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
-				column_best = k;
-				AIC_best = as.scalar(AICs[1,k]);
-			}
-		}
-				
-		# Append best found features (i.e., columns) to X_global
-		if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best feature found
-			print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);
-			columns_fixed[1,column_best] = 1;
-			columns_fixed_ordered = append (columns_fixed_ordered, as.matrix(column_best));
-			if (ncol(columns_fixed_ordered) == m_orig) { # all features examined
-				X_global = append (X_global, X_orig[,column_best]);
-				continue = FALSE;
-			} else {
-				X_global = append (X_global, X_orig[,column_best]);
-			}
-		} else {
-			continue = FALSE;
-		}
-	}
-	
-	# run linear regression with selected set of features
-	print ("Running linear regression with selected features...");
-	[AIC] = linear_regression (X_global, y, m_orig, columns_fixed_ordered, fileB); 
-	
-} else {
-	stop ("Currently only forward selection strategy is supported!");
-} 
-
-
-/*
-* Computes linear regression using a direct solver for (X^T X) beta = X^T y.
-* It also outputs the AIC of the computed model.  
-*/
-linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double m_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
-		
-	intercept_status = ifdef ($icpt, 0); 		
-	n = nrow (X);	
-	m = ncol (X);
-	
-	# Introduce the intercept, shift and rescale the columns of X if needed
-	if (intercept_status == 1 | intercept_status == 2) { # add the intercept column
-		ones_n = matrix (1, rows = n, cols = 1);
-		X = append (X, ones_n);
-		m = m - 1;
-	}
-	m_ext = ncol(X);
-	
-	if (intercept_status == 2) { # scale-&-shift X columns to mean 0, variance 1
-							     # Important assumption: X [, m_ext] = ones_n
-		avg_X_cols = t(colSums(X)) / n;
-		var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
-		is_unsafe = ppred (var_X_cols, 0.0, "<=");
-		scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-		scale_X [m_ext, 1] = 1;
-		shift_X = - avg_X_cols * scale_X;
-		shift_X [m_ext, 1] = 0;
-	} else {
-		scale_X = matrix (1, rows = m_ext, cols = 1);
-		shift_X = matrix (0, rows = m_ext, cols = 1);
-	}
-
-	# BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
-
-	A = t(X) %*% X;
-	b = t(X) %*% y;
-	if (intercept_status == 2) {
-		A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
-		A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-		b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
-	}
-
-	beta_unscaled = solve (A, b);
-	
-	# END THE DIRECT SOLVE ALGORITHM
-
-	if (intercept_status == 2) {
-		beta = scale_X * beta_unscaled;
-		beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
-	} else {
-		beta = beta_unscaled;
-	}
-	
-	# COMPUTE AIC
-	y_residual = y - X %*% beta;
-	ss_res = sum (y_residual ^ 2);
-	eq_deg_of_freedom = m_ext;
-	AIC = (2 * eq_deg_of_freedom) + n * log (ss_res / n);
-	
-	if (fileB != " ") {
-	
-		fileO = ifdef ($O, " ");
-		fileS = $S;
-		
-		print ("Computing the statistics...");
-		avg_tot = sum (y) / n;
-		ss_tot = sum (y ^ 2);
-		ss_avg_tot = ss_tot - n * avg_tot ^ 2;
-		var_tot = ss_avg_tot / (n - 1);
-#		y_residual = y - X %*% beta;
-		avg_res = sum (y_residual) / n;
-#		ss_res = sum (y_residual ^ 2);
-		ss_avg_res = ss_res - n * avg_res ^ 2;
-
-		plain_R2 = 1 - ss_res / ss_avg_tot;
-		if (n > m_ext) {
-			dispersion  = ss_res / (n - m_ext);
-			adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-		} else {
-			dispersion  = 0.0 / 0.0;
-			adjusted_R2 = 0.0 / 0.0;
-		}
-
-		plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
-		deg_freedom = n - m - 1;
-		if (deg_freedom > 0) {
-			var_res = ss_avg_res / deg_freedom;
-			adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
-		} else {
-			var_res = 0.0 / 0.0;
-			adjusted_R2_nobias = 0.0 / 0.0;
-			print ("Warning: zero or negative number of degrees of freedom.");
-		}
-
-		plain_R2_vs_0 = 1 - ss_res / ss_tot;
-		if (n > m) {
-			adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-		} else {
-			adjusted_R2_vs_0 = 0.0 / 0.0;
-		}
-
-		str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
-		str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
-		str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
-		str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
-		str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
-		str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
-		str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
-		str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
-		str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
-		if (intercept_status == 0) {
-			str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
-			str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
-		}
-
-		if (fileO != " ") {
-			write (str, fileO);
-		} else {
-			print (str);
-		}
-
-		# Prepare the output matrix
-		print ("Writing the output matrix...");
-		if (intercept_status == 2) {
-			beta_out = append (beta, beta_unscaled);
-		} else {
-			beta_out = beta;
-		}
-		
-		# Output which features give the best AIC and are being used for linear regression 
-		write (Selected, fileS, format=fmt);
-		
-		no_selected = ncol (Selected);
-		max_selected = max (Selected);
-		last = max_selected + 1;	
-		
-		if (intercept_status != 0) {
-		
-			Selected_ext = append (Selected, as.matrix (last));			
-			P1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); 
-
-			if (intercept_status == 2) {
-			
-				P1_beta = P1 * beta;
-				P2_beta = colSums (P1_beta);
-				P1_beta_unscaled = P1 * beta_unscaled;
-				P2_beta_unscaled = colSums(P1_beta_unscaled);
-				
-				if (max_selected < m_orig) {
-					P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
-					P2_beta_unscaled = append (P2_beta_unscaled, matrix (0, rows=1, cols=(m_orig - max_selected)));
-					
-					P2_beta[1, m_orig+1] = P2_beta[1, max_selected + 1]; 
-					P2_beta[1, max_selected + 1] = 0;
-				
-					P2_beta_unscaled[1, m_orig+1] = P2_beta_unscaled[1, max_selected + 1]; 
-					P2_beta_unscaled[1, max_selected + 1] = 0;
-				}
-				beta_out = append (t(P2_beta), t(P2_beta_unscaled));
-				
-			} else {
-			
-				P1_beta = P1 * beta;
-				P2_beta = colSums (P1_beta);
-				
-				if (max_selected < m_orig) {
-					P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
-					P2_beta[1, m_orig+1] = P2_beta[1, max_selected + 1] ; 
-					P2_beta[1, max_selected + 1] = 0;
-				}
-				beta_out = t(P2_beta);
-				
-			}
-		} else {
-		
-			P1 = table (seq (1, no_selected), t(Selected)); 
-			P1_beta = P1 * beta;
-			P2_beta = colSums (P1_beta);	
-
-			if (max_selected < m_orig) {
-				P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
-			}		
-
-			beta_out = t(P2_beta);	
-		}
-		
-		write ( beta_out, fileB, format=fmt );		
-	}
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT CHOOSES A LINEAR MODEL IN A STEPWISE ALGIRITHM USING AIC
+# EACH LINEAR REGRESSION USES A DIRECT SOLVER FOR (X^T X) beta = X^T y
+#
+# INPUT PARAMETERS:
+# --------------------------------------------------------------------------------------------
+# NAME    TYPE    DEFAULT    MEANING
+# --------------------------------------------------------------------------------------------
+# X       String   	---      Location (on HDFS) to read the matrix X of feature vectors
+# Y       String   	---      Location (on HDFS) to read the 1-column matrix Y of response values
+# B       String   	---      Location to store estimated regression parameters (the betas)
+# S       String    ---      Location to write the selected features ordered as computed by the algorithm
+# O       String   	" "      Location to write the printed statistics; by default is standard output
+# icpt    Int        0       Intercept presence, shifting and rescaling the columns of X:
+#                            0 = no intercept, no shifting, no rescaling;
+#                            1 = add intercept, but neither shift nor rescale X;
+#                            2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# thr     Double    0.01     Threshold to stop the algorithm: if the decrease in the value of AIC falls below thr
+#                            no further features are being checked and the algorithm stops 
+# fmt     String   "text"    Matrix output format for B (the betas) only, usually "text" or "csv"
+# --------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                        Col.2: betas for shifted/rescaled X and intercept
+#
+# In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
+# name-value pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------
+# AVG_TOT_Y             Average of the response value Y
+# STDEV_TOT_Y           Standard Deviation of the response value Y
+# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
+# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
+# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
+# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
+# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
+# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
+# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
+# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
+# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
+# -------------------------------------------------------------------------------------
+# * The last two statistics are only printed if there is no intercept (icpt=0)
+# If the best AIC is achieved without any features the matrix of selected features contains 0.  
+# Moreover, in this case no further statistics will be produced  
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f StepLinearRegDS.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
+#     O=OUTPUT_DIR/stats S=OUTPUT_DIR/selected icpt=2 thr=0.01 fmt=csv
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileS = $S;
+
+# currently only the forward selection strategy in supported: start from one feature and iteratively add 
+# features until AIC improves
+dir = "forward";
+
+fmt  = ifdef ($fmt, "text");
+intercept_status = ifdef ($icpt, 0);   
+thr = ifdef ($thr, 0.01);  
+
+print ("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
+print ("Reading X and Y...");
+X_orig = read (fileX);
+y = read (fileY);
+
+n = nrow (X_orig);
+m_orig = ncol (X_orig);
+
+# BEGIN STEPWISE LINEAR REGRESSION
+
+if (dir == "forward") {  
+	
+	continue = TRUE;
+	columns_fixed = matrix (0, rows = 1, cols = m_orig);
+	columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
+	
+	# X_global stores the best model found at each step 
+	X_global = matrix (0, rows = n, cols = 1);
+	
+	if (intercept_status == 1 | intercept_status == 2) {
+		beta = mean (y);
+		AIC_best = 2 + n * log(sum((beta - y)^2) / n);
+	} else {
+		beta = 0;
+		AIC_best = n * log(sum(y^2) / n);
+	}
+	AICs = matrix (AIC_best, rows = 1, cols = m_orig);
+	print ("Best AIC without any features: " + AIC_best);
+	
+	# First pass to examine single features
+	parfor (i in 1:m_orig) { 	
+		[AIC_1] = linear_regression (X_orig[,i], y, m_orig, columns_fixed_ordered, " ");					
+		AICs[1,i] = AIC_1;
+	}
+
+	# Determine the best AIC 
+	column_best = 0;	
+	for (k in 1:m_orig) {
+		AIC_cur = as.scalar (AICs[1,k]);
+		if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) ) {
+			column_best = k;
+			AIC_best = as.scalar(AICs[1,k]);
+		}
+	}
+	
+	if (column_best == 0) {
+		print ("AIC of an empty model is " + AIC_best + " and adding no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
+		S = matrix (0, rows=1, cols=1);
+		if (intercept_status == 0) {
+			B = matrix (beta, rows = m_orig, cols = 1);
+		} else {
+			B_tmp = matrix (0, rows = m_orig + 1, cols = 1);
+			B_tmp[m_orig + 1,] = beta;
+			B = B_tmp;
+		}
+		write (S, fileS, format=fmt);
+		write (B, fileB, format=fmt);
+		stop ("");
+	}
+	print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);	
+	columns_fixed[1,column_best] = 1;
+	columns_fixed_ordered[1,1] = column_best;
+	X_global = X_orig[,column_best];		
+		
+	while (continue) {
+		# Subsequent passes over the features
+		parfor (i in 1:m_orig) { 
+			if (as.scalar(columns_fixed[1,i]) == 0) {	
+			
+				# Construct the feature matrix
+				X = append (X_global, X_orig[,i]);
+				
+				[AIC_2] = linear_regression (X, y, m_orig, columns_fixed_ordered, " ");
+				AICs[1,i] = AIC_2;
+			}	
+		}
+	
+		# Determine the best AIC
+		for (k in 1:m_orig) {
+			AIC_cur = as.scalar (AICs[1,k]);
+			if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
+				column_best = k;
+				AIC_best = as.scalar(AICs[1,k]);
+			}
+		}
+				
+		# Append best found features (i.e., columns) to X_global
+		if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best feature found
+			print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);
+			columns_fixed[1,column_best] = 1;
+			columns_fixed_ordered = append (columns_fixed_ordered, as.matrix(column_best));
+			if (ncol(columns_fixed_ordered) == m_orig) { # all features examined
+				X_global = append (X_global, X_orig[,column_best]);
+				continue = FALSE;
+			} else {
+				X_global = append (X_global, X_orig[,column_best]);
+			}
+		} else {
+			continue = FALSE;
+		}
+	}
+	
+	# run linear regression with selected set of features
+	print ("Running linear regression with selected features...");
+	[AIC] = linear_regression (X_global, y, m_orig, columns_fixed_ordered, fileB); 
+	
+} else {
+	stop ("Currently only forward selection strategy is supported!");
+} 
+
+
+/*
+* Computes linear regression using a direct solver for (X^T X) beta = X^T y.
+* It also outputs the AIC of the computed model.  
+*/
+linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double m_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
+		
+	intercept_status = ifdef ($icpt, 0); 		
+	n = nrow (X);	
+	m = ncol (X);
+	
+	# Introduce the intercept, shift and rescale the columns of X if needed
+	if (intercept_status == 1 | intercept_status == 2) { # add the intercept column
+		ones_n = matrix (1, rows = n, cols = 1);
+		X = append (X, ones_n);
+		m = m - 1;
+	}
+	m_ext = ncol(X);
+	
+	if (intercept_status == 2) { # scale-&-shift X columns to mean 0, variance 1
+							     # Important assumption: X [, m_ext] = ones_n
+		avg_X_cols = t(colSums(X)) / n;
+		var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
+		is_unsafe = ppred (var_X_cols, 0.0, "<=");
+		scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+		scale_X [m_ext, 1] = 1;
+		shift_X = - avg_X_cols * scale_X;
+		shift_X [m_ext, 1] = 0;
+	} else {
+		scale_X = matrix (1, rows = m_ext, cols = 1);
+		shift_X = matrix (0, rows = m_ext, cols = 1);
+	}
+
+	# BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
+
+	A = t(X) %*% X;
+	b = t(X) %*% y;
+	if (intercept_status == 2) {
+		A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
+		A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
+		b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
+	}
+
+	beta_unscaled = solve (A, b);
+	
+	# END THE DIRECT SOLVE ALGORITHM
+
+	if (intercept_status == 2) {
+		beta = scale_X * beta_unscaled;
+		beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
+	} else {
+		beta = beta_unscaled;
+	}
+	
+	# COMPUTE AIC
+	y_residual = y - X %*% beta;
+	ss_res = sum (y_residual ^ 2);
+	eq_deg_of_freedom = m_ext;
+	AIC = (2 * eq_deg_of_freedom) + n * log (ss_res / n);
+	
+	if (fileB != " ") {
+	
+		fileO = ifdef ($O, " ");
+		fileS = $S;
+		
+		print ("Computing the statistics...");
+		avg_tot = sum (y) / n;
+		ss_tot = sum (y ^ 2);
+		ss_avg_tot = ss_tot - n * avg_tot ^ 2;
+		var_tot = ss_avg_tot / (n - 1);
+#		y_residual = y - X %*% beta;
+		avg_res = sum (y_residual) / n;
+#		ss_res = sum (y_residual ^ 2);
+		ss_avg_res = ss_res - n * avg_res ^ 2;
+
+		plain_R2 = 1 - ss_res / ss_avg_tot;
+		if (n > m_ext) {
+			dispersion  = ss_res / (n - m_ext);
+			adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
+		} else {
+			dispersion  = 0.0 / 0.0;
+			adjusted_R2 = 0.0 / 0.0;
+		}
+
+		plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
+		deg_freedom = n - m - 1;
+		if (deg_freedom > 0) {
+			var_res = ss_avg_res / deg_freedom;
+			adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
+		} else {
+			var_res = 0.0 / 0.0;
+			adjusted_R2_nobias = 0.0 / 0.0;
+			print ("Warning: zero or negative number of degrees of freedom.");
+		}
+
+		plain_R2_vs_0 = 1 - ss_res / ss_tot;
+		if (n > m) {
+			adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
+		} else {
+			adjusted_R2_vs_0 = 0.0 / 0.0;
+		}
+
+		str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
+		str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
+		str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
+		str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
+		str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
+		str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
+		str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
+		str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
+		str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
+		if (intercept_status == 0) {
+			str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
+			str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
+		}
+
+		if (fileO != " ") {
+			write (str, fileO);
+		} else {
+			print (str);
+		}
+
+		# Prepare the output matrix
+		print ("Writing the output matrix...");
+		if (intercept_status == 2) {
+			beta_out = append (beta, beta_unscaled);
+		} else {
+			beta_out = beta;
+		}
+		
+		# Output which features give the best AIC and are being used for linear regression 
+		write (Selected, fileS, format=fmt);
+		
+		no_selected = ncol (Selected);
+		max_selected = max (Selected);
+		last = max_selected + 1;	
+		
+		if (intercept_status != 0) {
+		
+			Selected_ext = append (Selected, as.matrix (last));			
+			P1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); 
+
+			if (intercept_status == 2) {
+			
+				P1_beta = P1 * beta;
+				P2_beta = colSums (P1_beta);
+				P1_beta_unscaled = P1 * beta_unscaled;
+				P2_beta_unscaled = colSums(P1_beta_unscaled);
+				
+				if (max_selected < m_orig) {
+					P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
+					P2_beta_unscaled = append (P2_beta_unscaled, matrix (0, rows=1, cols=(m_orig - max_selected)));
+					
+					P2_beta[1, m_orig+1] = P2_beta[1, max_selected + 1]; 
+					P2_beta[1, max_selected + 1] = 0;
+				
+					P2_beta_unscaled[1, m_orig+1] = P2_beta_unscaled[1, max_selected + 1]; 
+					P2_beta_unscaled[1, max_selected + 1] = 0;
+				}
+				beta_out = append (t(P2_beta), t(P2_beta_unscaled));
+				
+			} else {
+			
+				P1_beta = P1 * beta;
+				P2_beta = colSums (P1_beta);
+				
+				if (max_selected < m_orig) {
+					P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
+					P2_beta[1, m_orig+1] = P2_beta[1, max_selected + 1] ; 
+					P2_beta[1, max_selected + 1] = 0;
+				}
+				beta_out = t(P2_beta);
+				
+			}
+		} else {
+		
+			P1 = table (seq (1, no_selected), t(Selected)); 
+			P1_beta = P1 * beta;
+			P2_beta = colSums (P1_beta);	
+
+			if (max_selected < m_orig) {
+				P2_beta = append (P2_beta, matrix (0, rows=1, cols=(m_orig - max_selected)));
+			}		
+
+			beta_out = t(P2_beta);	
+		}
+		
+		write ( beta_out, fileB, format=fmt );		
+	}
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/Univar-Stats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Univar-Stats.dml b/scripts/algorithms/Univar-Stats.dml
index abb3fea..62d6a28 100644
--- a/scripts/algorithms/Univar-Stats.dml
+++ b/scripts/algorithms/Univar-Stats.dml
@@ -1,150 +1,150 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) X - input data
-#     $2) TYPES - row matrix that denotes the "kind"/"type" of all attributes
-#             kind=1 for scale, 
-#             kind=2 for nominal,
-#             kind=3 for ordinal
-#
-# One output:
-#     $STATS) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($X); # data file
-K = read($TYPES); # attribute kind file
-
-# number of features/attributes
-n = ncol(A);
-
-# number of data records
-m = nrow(A);
-
-# number of statistics
-numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-max_kind = max(K);
-
-# matrices to store computed statistics
-baseStats = matrix(0, rows=numBaseStats, cols=n);
-
-# Compute max domain size among all categorical attributes
-maxs = colMaxs(A);
-maxDomainSize = max( ppred(K, 1, ">") * maxs );
-maxDomain = as.integer(maxDomainSize);
-
-
-parfor(i in 1:n, check=0) {
-
-	# project out the i^th column
-	F = A[,i];
-
-	kind = castAsScalar(K[1,i]);
-
-	if ( kind == 1 ) {
-		#print("[" + i + "] Scale");
-		# compute SCALE statistics on the projected column
-		minimum = min(F);
-		maximum = max(F);
-		rng = maximum - minimum;
-
-		mu = mean(F);
-		m2 = moment(F, 2);
-		m3 = moment(F, 3);
-		m4 = moment(F, 4);
-
-		var = m/(m-1.0)*m2;
-		std_dev = sqrt(var);
-		se = std_dev/sqrt(m);
-		cv = std_dev/mu;
-
-		g1 = m3/(std_dev^3);
-		g2 = m4/(std_dev^4) - 3;
-		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-		md = median(F); #quantile(F, 0.5);
-		iqm = interQuartileMean(F);
-
-		# place the computed statistics in output matrices
-		baseStats[1,i] = minimum;
-		baseStats[2,i] = maximum;
-		baseStats[3,i] = rng;
-
-		baseStats[4,i] = mu;
-		baseStats[5,i] = var;
-		baseStats[6,i] = std_dev;
-		baseStats[7,i] = se;
-		baseStats[8,i] = cv;
-
-		baseStats[9,i] = g1;
-		baseStats[10,i] = g2;
-		baseStats[11,i] = se_g1;
-		baseStats[12,i] = se_g2;
-
-		baseStats[13,i] = md;
-		baseStats[14,i] = iqm;
-	}
-	else {
-		if (kind == 2 | kind == 3) {
-			#print("[" + i + "] Categorical");
-			
-			# check if the categorical column has valid values
-			minF = min(F);
-			if (minF <=0) {
-				print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-			}
-			else {
-				# compute CATEGORICAL statistics on the projected column
-				num_cat = max(F); # number of categories
-				cat_counts = table(F,1, maxDomain, 1);  # counts for each category
-
-				mode = rowIndexMax(t(cat_counts));
-				mx = max(cat_counts)
-				modeArr =  ppred(cat_counts, mx, "==")
-				numModes = sum(modeArr);
-
-				# place the computed statistics in output matrices
-				baseStats[15,i] = num_cat;
-				baseStats[16,i] = mode;
-				baseStats[17,i] = numModes;
-			}
-		}
-	}
-}
-
-write(baseStats, $STATS);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) X - input data
+#     $2) TYPES - row matrix that denotes the "kind"/"type" of all attributes
+#             kind=1 for scale, 
+#             kind=2 for nominal,
+#             kind=3 for ordinal
+#
+# One output:
+#     $STATS) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($X); # data file
+K = read($TYPES); # attribute kind file
+
+# number of features/attributes
+n = ncol(A);
+
+# number of data records
+m = nrow(A);
+
+# number of statistics
+numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+max_kind = max(K);
+
+# matrices to store computed statistics
+baseStats = matrix(0, rows=numBaseStats, cols=n);
+
+# Compute max domain size among all categorical attributes
+maxs = colMaxs(A);
+maxDomainSize = max( ppred(K, 1, ">") * maxs );
+maxDomain = as.integer(maxDomainSize);
+
+
+parfor(i in 1:n, check=0) {
+
+	# project out the i^th column
+	F = A[,i];
+
+	kind = castAsScalar(K[1,i]);
+
+	if ( kind == 1 ) {
+		#print("[" + i + "] Scale");
+		# compute SCALE statistics on the projected column
+		minimum = min(F);
+		maximum = max(F);
+		rng = maximum - minimum;
+
+		mu = mean(F);
+		m2 = moment(F, 2);
+		m3 = moment(F, 3);
+		m4 = moment(F, 4);
+
+		var = m/(m-1.0)*m2;
+		std_dev = sqrt(var);
+		se = std_dev/sqrt(m);
+		cv = std_dev/mu;
+
+		g1 = m3/(std_dev^3);
+		g2 = m4/(std_dev^4) - 3;
+		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+		md = median(F); #quantile(F, 0.5);
+		iqm = interQuartileMean(F);
+
+		# place the computed statistics in output matrices
+		baseStats[1,i] = minimum;
+		baseStats[2,i] = maximum;
+		baseStats[3,i] = rng;
+
+		baseStats[4,i] = mu;
+		baseStats[5,i] = var;
+		baseStats[6,i] = std_dev;
+		baseStats[7,i] = se;
+		baseStats[8,i] = cv;
+
+		baseStats[9,i] = g1;
+		baseStats[10,i] = g2;
+		baseStats[11,i] = se_g1;
+		baseStats[12,i] = se_g2;
+
+		baseStats[13,i] = md;
+		baseStats[14,i] = iqm;
+	}
+	else {
+		if (kind == 2 | kind == 3) {
+			#print("[" + i + "] Categorical");
+			
+			# check if the categorical column has valid values
+			minF = min(F);
+			if (minF <=0) {
+				print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+			}
+			else {
+				# compute CATEGORICAL statistics on the projected column
+				num_cat = max(F); # number of categories
+				cat_counts = table(F,1, maxDomain, 1);  # counts for each category
+
+				mode = rowIndexMax(t(cat_counts));
+				mx = max(cat_counts)
+				modeArr =  ppred(cat_counts, mx, "==")
+				numModes = sum(modeArr);
+
+				# place the computed statistics in output matrices
+				baseStats[15,i] = num_cat;
+				baseStats[16,i] = mode;
+				baseStats[17,i] = numModes;
+			}
+		}
+	}
+}
+
+write(baseStats, $STATS);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/bivar-stats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/bivar-stats.dml b/scripts/algorithms/bivar-stats.dml
index 4846f56..99549dc 100644
--- a/scripts/algorithms/bivar-stats.dml
+++ b/scripts/algorithms/bivar-stats.dml
@@ -1,398 +1,398 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-#
-# For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
-#   Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n} 
-#          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
-#
-# Six inputs:  
-#    1) X  - input data
-#    2) index1 - First attribute set {A_11, A_12, ... A_1m}
-#    3) index2 - Second attribute set {A_21, A_22, ... A_2n}
-#    4) types1 - kind for attributes in S1 
-#    5) types2 - kind for attributes in S2
-#             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
-# 
-# One output:    
-#    6) output directory in which following (maximum of) four statistics files are created
-#        + bivar.scale.scale.stats - matrix containing scale-scale correlations
-#        + bivar.nominal.nominal.stats - 
-#        + bivar.nominal.scale.stats - 
-#        + bivar.ordinal.ordinal.stats - 
-#
-# hadoop jar SystemML.jar -f bivar-stats.dml -nvargs X=<Data>
-#                                                    index1=<Feature Index Set 1>
-#                                                    index2=<Feature Index Set 2>
-#                                                    types1=<Feature Types 1>
-#                                                    types2=<Feature Types 2>
-#                                                    OUTDIR=<Output Location>
-
-D = read($X);  # input data set
-S1 = read($index1); # attribute set 1
-S2 = read($index2); # attribute set 2
-K1 = read($types1); # kind for attributes in S1
-K2 = read($types2); # kind for attributes in S2
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-numPairs = s1size * s2size;
-
-#test: 1 is Pearson'R, 2 is F-test, 3 is chi-squared, 4 is Spearman'sRho
-# R, (chisq, df, pval, cramersv,) spearman, eta, anovaf, feature_col_index1, feature_col_index2, test
-
-num_scale_scale_tests = 0
-num_nominal_nominal_tests = 0
-num_ordinal_ordinal_tests = 0
-num_nominal_scale_tests = 0
-
-pair2row = matrix(0, rows=numPairs, cols=2)
-for( i in 1:s1size, check=0) {
-    pre_a1 = castAsScalar(S1[1,i]);
-    pre_k1 = castAsScalar(K1[1,i]);
-
-    for( j in 1:s2size, check=0) {
-        pre_pairID = (i-1)*s2size+j; 
-        pre_a2 = castAsScalar(S2[1,j]);
-        pre_k2 = castAsScalar(K2[1,j]);
-	
-	if (pre_k1 == pre_k2) {
-            if (pre_k1 == 1) {
-	        	num_scale_scale_tests = num_scale_scale_tests + 1
-				pair2row[pre_pairID,1] = num_scale_scale_tests
-            } else {
-	      		num_nominal_nominal_tests = num_nominal_nominal_tests + 1
-				pair2row[pre_pairID,1] = num_nominal_nominal_tests
-		
-                if ( pre_k1 == 3 ) {
-		    		num_ordinal_ordinal_tests = num_ordinal_ordinal_tests + 1
-		    		pair2row[pre_pairID, 2] = num_ordinal_ordinal_tests
-                }
-            }
-        }
-        else {
-            if (pre_k1 == 1 | pre_k2 == 1) {
-	        	num_nominal_scale_tests = num_nominal_scale_tests + 1
-				pair2row[pre_pairID,1] = num_nominal_scale_tests
-            } else {
-	        	num_nominal_nominal_tests = num_nominal_nominal_tests + 1
-				pair2row[pre_pairID,1] = num_nominal_nominal_tests 
-            }
-		}
-    }
-}
-
-size_scale_scale_tests     = max(num_scale_scale_tests, 1);
-size_nominal_nominal_tests = max(num_nominal_nominal_tests, 1)
-size_ordinal_ordinal_tests = max(num_ordinal_ordinal_tests, 1);
-size_nominal_scale_tests   = max(num_nominal_scale_tests, 1);
-
-basestats                 = matrix(0, rows=11, cols=numPairs);
-basestats_scale_scale     = matrix(0, rows=6, cols=size_scale_scale_tests)
-basestats_nominal_nominal = matrix(0, rows=6, cols=size_nominal_nominal_tests)
-basestats_ordinal_ordinal = matrix(0, rows=3, cols=size_ordinal_ordinal_tests)
-basestats_nominal_scale   = matrix(0, rows=11, cols=size_nominal_scale_tests)
-
-
-# Compute max domain size among all categorical attributes
-# and check if these cols have been recoded
-
-debug_str = "Stopping execution of DML script due to invalid input";
-
-error_flag = FALSE;
-
-maxs = colMaxs(D);
-mins = colMins(D)
-maxDomainSize = -1.0;
-for(k in 1:ncol(K1) ) {
-  type = as.scalar(K1[1,k]);
-  
-  if ( type > 1) {
-    colID = as.scalar(S1[1,k]);
-    
-    colMaximum = as.scalar(maxs[1,colID]);
-    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
-  
-  	colMinimum = as.scalar(mins[1,colID]);
-  	if(colMinimum < 1){
-  	  if(type == 2)
-  	    debug_str = append(debug_str, "Column " + colID + " was declared as nominal but its minimum value is " + colMinimum)
-  	  else
-  	    debug_str = append(debug_str, "Column " + colID + " was declared as ordinal but its minimum value is " + colMinimum)
-  	  error_flag = TRUE;
-  	}
-  }
-}
-
-for(k in 1:ncol(K2) ) {
-  type = as.scalar(K2[1,k]);
-  
-  if ( type > 1) {
-    colID = as.scalar(S2[1,k]);
-    
-    colMaximum = as.scalar(maxs[1,colID]);
-    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
-  
-  	colMinimum = as.scalar(mins[1,colID]);
-  	if(colMinimum < 1){
-  	  if(type == 2)
-  	    debug_str = append(debug_str, "Column " + colID + " was declared as nominal but its minimum value is " + colMinimum)
-  	  else 
-  	  	debug_str = append(debug_str, "Column " + colID + " was declared as ordinal but its minimum value is " + colMinimum)
-  	  error_flag = TRUE;
-  	}
-  }
-}
-maxDomain = as.integer(maxDomainSize);
-
-if(error_flag) stop(debug_str);
-
-parfor( i in 1:s1size, check=0) {
-    a1 = castAsScalar(S1[1,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, check=0) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[1,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-
-		rowid1 = castAsScalar(pair2row[pairID, 1])
-    	rowid2 = castAsScalar(pair2row[pairID, 2])
-
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                [r, cov, sigma1, sigma2] = bivar_ss(A1,A2);   
-		
-    			basestats_scale_scale[1,rowid1] = a1;
-				basestats_scale_scale[2,rowid1] = a2;	
-                basestats_scale_scale[3,rowid1] = r;
-                basestats_scale_scale[4,rowid1] = cov;
-                basestats_scale_scale[5,rowid1] = sigma1;
-                basestats_scale_scale[6,rowid1] = sigma2;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
-
-                basestats_nominal_nominal[1,rowid1] = a1;
-				basestats_nominal_nominal[2,rowid1] = a2;	
-                basestats_nominal_nominal[3,rowid1] = chisq;
-                basestats_nominal_nominal[4,rowid1] = df;
-                basestats_nominal_nominal[5,rowid1] = pval;
-                basestats_nominal_nominal[6,rowid1] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2, maxDomain);
-
-                    basestats_ordinal_ordinal[1,rowid2] = a1;
-                    basestats_ordinal_ordinal[2,rowid2] = a2;
-                    basestats_ordinal_ordinal[3,rowid2] = sp;
-                }
-            }
-        } else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal     
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               	if ( k1 == 1 ) {
-                	[eta, f, pval, bw_ss, within_ss, bw_df, within_df, bw_mean_square, within_mean_square] = bivar_sc(A1, A2, maxDomain);
-                } else {
-                    [eta, f, pval, bw_ss, within_ss, bw_df, within_df, bw_mean_square, within_mean_square] = bivar_sc(A2, A1, maxDomain);
-                }
-		
-                basestats_nominal_scale[1,rowid1] = a1;
-                basestats_nominal_scale[2,rowid1] = a2;
-                basestats_nominal_scale[3,rowid1] = eta;
-                basestats_nominal_scale[4,rowid1] = f;
-                basestats_nominal_scale[5,rowid1] = pval;
-                basestats_nominal_scale[6,rowid1] = bw_ss;
-                basestats_nominal_scale[7,rowid1] = within_ss;
-                basestats_nominal_scale[8,rowid1] = bw_df;
-                basestats_nominal_scale[9,rowid1] = within_df;
-                basestats_nominal_scale[10,rowid1] = bw_mean_square;
-                basestats_nominal_scale[11,rowid1] = within_mean_square;
-            } else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
-
-				basestats_nominal_nominal[1,rowid1] = a1;
-				basestats_nominal_nominal[2,rowid1] = a2;
-                basestats_nominal_nominal[3,rowid1] = chisq;
-                basestats_nominal_nominal[4,rowid1] = df;
-                basestats_nominal_nominal[5,rowid1] = pval;
-                basestats_nominal_nominal[6,rowid1] = cramersv;
-            }
-        }
-    }
-}
-
-if(num_scale_scale_tests == size_scale_scale_tests){
-  write(basestats_scale_scale, $OUTDIR + "/bivar.scale.scale.stats");
-}
-
-if(num_nominal_scale_tests == size_nominal_scale_tests){
-  write(basestats_nominal_scale, $OUTDIR + "/bivar.nominal.scale.stats");
-}
-
-if(num_nominal_nominal_tests == size_nominal_nominal_tests){
-  write(basestats_nominal_nominal, $OUTDIR + "/bivar.nominal.nominal.stats");
-}
-
-if(num_ordinal_ordinal_tests == size_ordinal_ordinal_tests){
-  write(basestats_ordinal_ordinal, $OUTDIR + "/bivar.ordinal.ordinal.stats");
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A, B, maxDomain, maxDomain);
-    F = F[1:max(A), 1:max(B)];
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A, Double maxDomain) 
-		   return (Double Eta, Double AnovaF, Double pval, Double bw_ss, Double within_ss, Double bw_df, Double within_df, Double bw_mean_square, Double within_mean_square) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count", ngroups=maxDomain); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean", ngroups=maxDomain);
-    CVars =  aggregate(target=Y, groups=A, fn="variance", ngroups=maxDomain);
-    
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    bw_ss = sum( (CFreqs*(CMeans-my)^2) );
-    bw_df = as.double(R-1);
-    bw_mean_square = bw_ss/bw_df;
-	
-    within_ss = sum( (CFreqs-1)*CVars );
-    within_df = as.double(W-R);
-    within_mean_square = within_ss/within_df;
-	
-    AnovaF = bw_mean_square/within_mean_square;
-    
-    pval = pf(target=AnovaF, df1=bw_df, df2=within_df, lower.tail=FALSE)
-}
-
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) return (Double sp) {
-
-    # compute contingency table
-    F = table(A, B, maxDomain, maxDomain);
-    F = F[1:max(A), 1:max(B)];
-    
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+#
+# For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+#   Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n} 
+#          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+#
+# Six inputs:  
+#    1) X  - input data
+#    2) index1 - First attribute set {A_11, A_12, ... A_1m}
+#    3) index2 - Second attribute set {A_21, A_22, ... A_2n}
+#    4) types1 - kind for attributes in S1 
+#    5) types2 - kind for attributes in S2
+#             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+# 
+# One output:    
+#    6) output directory in which following (maximum of) four statistics files are created
+#        + bivar.scale.scale.stats - matrix containing scale-scale correlations
+#        + bivar.nominal.nominal.stats - 
+#        + bivar.nominal.scale.stats - 
+#        + bivar.ordinal.ordinal.stats - 
+#
+# hadoop jar SystemML.jar -f bivar-stats.dml -nvargs X=<Data>
+#                                                    index1=<Feature Index Set 1>
+#                                                    index2=<Feature Index Set 2>
+#                                                    types1=<Feature Types 1>
+#                                                    types2=<Feature Types 2>
+#                                                    OUTDIR=<Output Location>
+
+D = read($X);  # input data set
+S1 = read($index1); # attribute set 1
+S2 = read($index2); # attribute set 2
+K1 = read($types1); # kind for attributes in S1
+K2 = read($types2); # kind for attributes in S2
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+numPairs = s1size * s2size;
+
+#test: 1 is Pearson'R, 2 is F-test, 3 is chi-squared, 4 is Spearman'sRho
+# R, (chisq, df, pval, cramersv,) spearman, eta, anovaf, feature_col_index1, feature_col_index2, test
+
+num_scale_scale_tests = 0
+num_nominal_nominal_tests = 0
+num_ordinal_ordinal_tests = 0
+num_nominal_scale_tests = 0
+
+pair2row = matrix(0, rows=numPairs, cols=2)
+for( i in 1:s1size, check=0) {
+    pre_a1 = castAsScalar(S1[1,i]);
+    pre_k1 = castAsScalar(K1[1,i]);
+
+    for( j in 1:s2size, check=0) {
+        pre_pairID = (i-1)*s2size+j; 
+        pre_a2 = castAsScalar(S2[1,j]);
+        pre_k2 = castAsScalar(K2[1,j]);
+	
+	if (pre_k1 == pre_k2) {
+            if (pre_k1 == 1) {
+	        	num_scale_scale_tests = num_scale_scale_tests + 1
+				pair2row[pre_pairID,1] = num_scale_scale_tests
+            } else {
+	      		num_nominal_nominal_tests = num_nominal_nominal_tests + 1
+				pair2row[pre_pairID,1] = num_nominal_nominal_tests
+		
+                if ( pre_k1 == 3 ) {
+		    		num_ordinal_ordinal_tests = num_ordinal_ordinal_tests + 1
+		    		pair2row[pre_pairID, 2] = num_ordinal_ordinal_tests
+                }
+            }
+        }
+        else {
+            if (pre_k1 == 1 | pre_k2 == 1) {
+	        	num_nominal_scale_tests = num_nominal_scale_tests + 1
+				pair2row[pre_pairID,1] = num_nominal_scale_tests
+            } else {
+	        	num_nominal_nominal_tests = num_nominal_nominal_tests + 1
+				pair2row[pre_pairID,1] = num_nominal_nominal_tests 
+            }
+		}
+    }
+}
+
+size_scale_scale_tests     = max(num_scale_scale_tests, 1);
+size_nominal_nominal_tests = max(num_nominal_nominal_tests, 1)
+size_ordinal_ordinal_tests = max(num_ordinal_ordinal_tests, 1);
+size_nominal_scale_tests   = max(num_nominal_scale_tests, 1);
+
+basestats                 = matrix(0, rows=11, cols=numPairs);
+basestats_scale_scale     = matrix(0, rows=6, cols=size_scale_scale_tests)
+basestats_nominal_nominal = matrix(0, rows=6, cols=size_nominal_nominal_tests)
+basestats_ordinal_ordinal = matrix(0, rows=3, cols=size_ordinal_ordinal_tests)
+basestats_nominal_scale   = matrix(0, rows=11, cols=size_nominal_scale_tests)
+
+
+# Compute max domain size among all categorical attributes
+# and check if these cols have been recoded
+
+debug_str = "Stopping execution of DML script due to invalid input";
+
+error_flag = FALSE;
+
+maxs = colMaxs(D);
+mins = colMins(D)
+maxDomainSize = -1.0;
+for(k in 1:ncol(K1) ) {
+  type = as.scalar(K1[1,k]);
+  
+  if ( type > 1) {
+    colID = as.scalar(S1[1,k]);
+    
+    colMaximum = as.scalar(maxs[1,colID]);
+    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
+  
+  	colMinimum = as.scalar(mins[1,colID]);
+  	if(colMinimum < 1){
+  	  if(type == 2)
+  	    debug_str = append(debug_str, "Column " + colID + " was declared as nominal but its minimum value is " + colMinimum)
+  	  else
+  	    debug_str = append(debug_str, "Column " + colID + " was declared as ordinal but its minimum value is " + colMinimum)
+  	  error_flag = TRUE;
+  	}
+  }
+}
+
+for(k in 1:ncol(K2) ) {
+  type = as.scalar(K2[1,k]);
+  
+  if ( type > 1) {
+    colID = as.scalar(S2[1,k]);
+    
+    colMaximum = as.scalar(maxs[1,colID]);
+    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
+  
+  	colMinimum = as.scalar(mins[1,colID]);
+  	if(colMinimum < 1){
+  	  if(type == 2)
+  	    debug_str = append(debug_str, "Column " + colID + " was declared as nominal but its minimum value is " + colMinimum)
+  	  else 
+  	  	debug_str = append(debug_str, "Column " + colID + " was declared as ordinal but its minimum value is " + colMinimum)
+  	  error_flag = TRUE;
+  	}
+  }
+}
+maxDomain = as.integer(maxDomainSize);
+
+if(error_flag) stop(debug_str);
+
+parfor( i in 1:s1size, check=0) {
+    a1 = castAsScalar(S1[1,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[1,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+
+		rowid1 = castAsScalar(pair2row[pairID, 1])
+    	rowid2 = castAsScalar(pair2row[pairID, 2])
+
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                [r, cov, sigma1, sigma2] = bivar_ss(A1,A2);   
+		
+    			basestats_scale_scale[1,rowid1] = a1;
+				basestats_scale_scale[2,rowid1] = a2;	
+                basestats_scale_scale[3,rowid1] = r;
+                basestats_scale_scale[4,rowid1] = cov;
+                basestats_scale_scale[5,rowid1] = sigma1;
+                basestats_scale_scale[6,rowid1] = sigma2;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
+
+                basestats_nominal_nominal[1,rowid1] = a1;
+				basestats_nominal_nominal[2,rowid1] = a2;	
+                basestats_nominal_nominal[3,rowid1] = chisq;
+                basestats_nominal_nominal[4,rowid1] = df;
+                basestats_nominal_nominal[5,rowid1] = pval;
+                basestats_nominal_nominal[6,rowid1] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2, maxDomain);
+
+                    basestats_ordinal_ordinal[1,rowid2] = a1;
+                    basestats_ordinal_ordinal[2,rowid2] = a2;
+                    basestats_ordinal_ordinal[3,rowid2] = sp;
+                }
+            }
+        } else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal     
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               	if ( k1 == 1 ) {
+                	[eta, f, pval, bw_ss, within_ss, bw_df, within_df, bw_mean_square, within_mean_square] = bivar_sc(A1, A2, maxDomain);
+                } else {
+                    [eta, f, pval, bw_ss, within_ss, bw_df, within_df, bw_mean_square, within_mean_square] = bivar_sc(A2, A1, maxDomain);
+                }
+		
+                basestats_nominal_scale[1,rowid1] = a1;
+                basestats_nominal_scale[2,rowid1] = a2;
+                basestats_nominal_scale[3,rowid1] = eta;
+                basestats_nominal_scale[4,rowid1] = f;
+                basestats_nominal_scale[5,rowid1] = pval;
+                basestats_nominal_scale[6,rowid1] = bw_ss;
+                basestats_nominal_scale[7,rowid1] = within_ss;
+                basestats_nominal_scale[8,rowid1] = bw_df;
+                basestats_nominal_scale[9,rowid1] = within_df;
+                basestats_nominal_scale[10,rowid1] = bw_mean_square;
+                basestats_nominal_scale[11,rowid1] = within_mean_square;
+            } else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
+
+				basestats_nominal_nominal[1,rowid1] = a1;
+				basestats_nominal_nominal[2,rowid1] = a2;
+                basestats_nominal_nominal[3,rowid1] = chisq;
+                basestats_nominal_nominal[4,rowid1] = df;
+                basestats_nominal_nominal[5,rowid1] = pval;
+                basestats_nominal_nominal[6,rowid1] = cramersv;
+            }
+        }
+    }
+}
+
+if(num_scale_scale_tests == size_scale_scale_tests){
+  write(basestats_scale_scale, $OUTDIR + "/bivar.scale.scale.stats");
+}
+
+if(num_nominal_scale_tests == size_nominal_scale_tests){
+  write(basestats_nominal_scale, $OUTDIR + "/bivar.nominal.scale.stats");
+}
+
+if(num_nominal_nominal_tests == size_nominal_nominal_tests){
+  write(basestats_nominal_nominal, $OUTDIR + "/bivar.nominal.nominal.stats");
+}
+
+if(num_ordinal_ordinal_tests == size_ordinal_ordinal_tests){
+  write(basestats_ordinal_ordinal, $OUTDIR + "/bivar.ordinal.ordinal.stats");
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A, B, maxDomain, maxDomain);
+    F = F[1:max(A), 1:max(B)];
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A, Double maxDomain) 
+		   return (Double Eta, Double AnovaF, Double pval, Double bw_ss, Double within_ss, Double bw_df, Double within_df, Double bw_mean_square, Double within_mean_square) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count", ngroups=maxDomain); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean", ngroups=maxDomain);
+    CVars =  aggregate(target=Y, groups=A, fn="variance", ngroups=maxDomain);
+    
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    bw_ss = sum( (CFreqs*(CMeans-my)^2) );
+    bw_df = as.double(R-1);
+    bw_mean_square = bw_ss/bw_df;
+	
+    within_ss = sum( (CFreqs-1)*CVars );
+    within_df = as.double(W-R);
+    within_mean_square = within_ss/within_df;
+	
+    AnovaF = bw_mean_square/within_mean_square;
+    
+    pval = pf(target=AnovaF, df1=bw_df, df2=within_df, lower.tail=FALSE)
+}
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) return (Double sp) {
+
+    # compute contingency table
+    F = table(A, B, maxDomain, maxDomain);
+    F = F[1:max(A), 1:max(B)];
+    
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/decision-tree-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/decision-tree-predict.dml b/scripts/algorithms/decision-tree-predict.dml
index 9e01adb..3447da6 100644
--- a/scripts/algorithms/decision-tree-predict.dml
+++ b/scripts/algorithms/decision-tree-predict.dml
@@ -1,142 +1,142 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A DECISION TREE MODEL ON A HELD OUT TEST SET.
-#
-# INPUT         PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME          TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X             String   ---          Location to read the test feature matrix X; note that X needs to be both recoded and dummy coded 
-# Y	 		    String   " "		  Location to read the true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
-# R   	  		String   " "	      Location to read matrix R which for each feature in X contains the following information 
-#										- R[,1]: column ids
-#										- R[,2]: start indices 
-#										- R[,3]: end indices
-#									  If R is not provided by default all variables are assumed to be scale
-# M             String 	 ---	   	  Location to read matrix M containing the learned tree in the following format
-#								 		- M[1,j]: id of node j (in a complete binary tree)
-#	 									- M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#	 									- M[3,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
-#	 									- M[4,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
-#		     									  otherwise the label that leaf node j is supposed to predict
-#	 									- M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
-#			 									  stored in rows 6,7,... if j is categorical 
-#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
-#	 									- M[6:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[6,j] 
-#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 6,7,... 
-#												   depict the value subset chosen for j
-#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-# P				String   ---		  Location to store the label predictions for X
-# A     		String   " "          Location to write the test accuracy (%) for the prediction if requested
-# CM     		String   " "		  Location to write the confusion matrix if requested 
-# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-#	1- Matrix Y containing the predicted labels for X 
-#   2- Test accuracy if requested
-#   3- Confusion matrix C if requested
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f decision-tree-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
-#														A=OUTPUT_DIR/accuracy CM=OUTPUT_DIR/confusion fmt=csv
-
-fileX = $X;
-fileM = $M;
-fileP = $P;
-fileY = ifdef ($Y, " ");
-fileR = ifdef ($R, " ");
-fileCM = ifdef ($CM, " ");
-fileA = ifdef ($A, " ");
-fmtO = ifdef ($fmt, "text");
-X_test = read (fileX);
-M = read (fileM);
-
-num_records = nrow (X_test);
-Y_predicted = matrix (0, rows = num_records, cols = 1);
-
-R_cat = matrix (0, rows = 1, cols = 1);
-R_scale = matrix (0, rows = 1, cols = 1);
-
-if (fileR != " ") {
-	R = read (fileR);
-	dummy_coded = ppred (R[,2], R[,3], "!=");
-	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
-	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
-} else { # only scale features available
-	R_scale = seq (1, ncol (X_test));
-}
-
-parfor (i in 1:num_records, check = 0) {
-	cur_sample = X_test[i,];
-	cur_node_pos = 1;
-	label_found = FALSE;
-	while (!label_found) {
-		cur_feature = as.scalar (M[3,cur_node_pos]);	
-		type_label = as.scalar (M[4,cur_node_pos]);
-		if (cur_feature == 0) { # leaf node
-			label_found = TRUE;
-			Y_predicted[i,] = type_label;
-		} else {
-			# determine type: 1 for scale, 2 for categorical 
-			if (type_label == 1) { # scale feature
-				cur_start_ind = as.scalar (R_scale[cur_feature,]);
-				cur_value = as.scalar (cur_sample[,cur_start_ind]);
-				cur_split = as.scalar (M[6,cur_node_pos]);
-				if (cur_value < cur_split) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]) + 1;
-				}
-			} else if (type_label == 2) { # categorical feature				
-				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
-				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
-				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); # as.scalar (cur_sample[,cur_feature]);
-				cur_offset = as.scalar (M[5,cur_node_pos]);
-				value_found = sum (ppred (M[6:(6 + cur_offset - 1),cur_node_pos], cur_value, "=="));
-				if (value_found) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]) + 1;
-				}
-}}}}
-
-write (Y_predicted, fileP, format = fmtO);
-
-if (fileY != " ") {
-	Y_test = read (fileY);
-	num_classes = ncol (Y_test);
-	Y_test = rowSums (Y_test * t (seq (1, num_classes)));
-	result = ppred (Y_test, Y_predicted, "==");
-	result = sum (result);
-	accuracy = result / num_records * 100;
-	acc_str = "Accuracy (%): " + accuracy;
-	if (fileA != " ") {
-		write (acc_str, fileA, format = fmtO);
-	} else {
-		print (acc_str);
-	}
-	if (fileCM != " ") {
-		confusion_mat = table(Y_predicted, Y_test, num_classes, num_classes)
-        write(confusion_mat, fileCM, format = fmtO)
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A DECISION TREE MODEL ON A HELD OUT TEST SET.
+#
+# INPUT         PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME          TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X             String   ---          Location to read the test feature matrix X; note that X needs to be both recoded and dummy coded 
+# Y	 		    String   " "		  Location to read the true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
+# R   	  		String   " "	      Location to read matrix R which for each feature in X contains the following information 
+#										- R[,1]: column ids
+#										- R[,2]: start indices 
+#										- R[,3]: end indices
+#									  If R is not provided by default all variables are assumed to be scale
+# M             String 	 ---	   	  Location to read matrix M containing the learned tree in the following format
+#								 		- M[1,j]: id of node j (in a complete binary tree)
+#	 									- M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+#	 									- M[3,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
+#	 									- M[4,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
+#		     									  otherwise the label that leaf node j is supposed to predict
+#	 									- M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
+#			 									  stored in rows 6,7,... if j is categorical 
+#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
+#	 									- M[6:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[6,j] 
+#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 6,7,... 
+#												   depict the value subset chosen for j
+#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# P				String   ---		  Location to store the label predictions for X
+# A     		String   " "          Location to write the test accuracy (%) for the prediction if requested
+# CM     		String   " "		  Location to write the confusion matrix if requested 
+# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+#	1- Matrix Y containing the predicted labels for X 
+#   2- Test accuracy if requested
+#   3- Confusion matrix C if requested
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f decision-tree-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
+#														A=OUTPUT_DIR/accuracy CM=OUTPUT_DIR/confusion fmt=csv
+
+fileX = $X;
+fileM = $M;
+fileP = $P;
+fileY = ifdef ($Y, " ");
+fileR = ifdef ($R, " ");
+fileCM = ifdef ($CM, " ");
+fileA = ifdef ($A, " ");
+fmtO = ifdef ($fmt, "text");
+X_test = read (fileX);
+M = read (fileM);
+
+num_records = nrow (X_test);
+Y_predicted = matrix (0, rows = num_records, cols = 1);
+
+R_cat = matrix (0, rows = 1, cols = 1);
+R_scale = matrix (0, rows = 1, cols = 1);
+
+if (fileR != " ") {
+	R = read (fileR);
+	dummy_coded = ppred (R[,2], R[,3], "!=");
+	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
+	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
+} else { # only scale features available
+	R_scale = seq (1, ncol (X_test));
+}
+
+parfor (i in 1:num_records, check = 0) {
+	cur_sample = X_test[i,];
+	cur_node_pos = 1;
+	label_found = FALSE;
+	while (!label_found) {
+		cur_feature = as.scalar (M[3,cur_node_pos]);	
+		type_label = as.scalar (M[4,cur_node_pos]);
+		if (cur_feature == 0) { # leaf node
+			label_found = TRUE;
+			Y_predicted[i,] = type_label;
+		} else {
+			# determine type: 1 for scale, 2 for categorical 
+			if (type_label == 1) { # scale feature
+				cur_start_ind = as.scalar (R_scale[cur_feature,]);
+				cur_value = as.scalar (cur_sample[,cur_start_ind]);
+				cur_split = as.scalar (M[6,cur_node_pos]);
+				if (cur_value < cur_split) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]) + 1;
+				}
+			} else if (type_label == 2) { # categorical feature				
+				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
+				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
+				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); # as.scalar (cur_sample[,cur_feature]);
+				cur_offset = as.scalar (M[5,cur_node_pos]);
+				value_found = sum (ppred (M[6:(6 + cur_offset - 1),cur_node_pos], cur_value, "=="));
+				if (value_found) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]) + 1;
+				}
+}}}}
+
+write (Y_predicted, fileP, format = fmtO);
+
+if (fileY != " ") {
+	Y_test = read (fileY);
+	num_classes = ncol (Y_test);
+	Y_test = rowSums (Y_test * t (seq (1, num_classes)));
+	result = ppred (Y_test, Y_predicted, "==");
+	result = sum (result);
+	accuracy = result / num_records * 100;
+	acc_str = "Accuracy (%): " + accuracy;
+	if (fileA != " ") {
+		write (acc_str, fileA, format = fmtO);
+	} else {
+		print (acc_str);
+	}
+	if (fileCM != " ") {
+		confusion_mat = table(Y_predicted, Y_test, num_classes, num_classes)
+        write(confusion_mat, fileCM, format = fmtO)
+	}
+}


[46/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/Kmeans.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml
index 3ec47a8..2887baa 100644
--- a/scripts/algorithms/Kmeans.dml
+++ b/scripts/algorithms/Kmeans.dml
@@ -1,282 +1,282 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# Implements the k-Means clustering algorithm
-#
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ----------------------------------------------------------------------------
-# X     String   ---    Location to read matrix X with the input data records
-# k     Int      ---    Number of centroids
-# runs  Int       10    Number of runs (with different initial centroids)
-# maxi  Int     1000    Maximum number of iterations per run
-# tol   Double 0.000001 Tolerance (epsilon) for WCSS change ratio
-# samp  Int       50    Average number of records per centroid in data samples
-# C     String  "C.mtx" Location to store the output matrix with the centroids
-# isY   Int        0    0 = do not write Y,  1 = write Y
-# Y     String  "Y.mtx" Location to store the mapping of records to centroids
-# fmt   String  "text"  Matrix output format, usually "text" or "csv"
-# verb  Int        0    0 = do not print per-iteration stats, 1 = print them
-# ----------------------------------------------------------------------------
-#
-# Example:
-# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 C=centroids.mtx
-# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 runs=100 maxi=5000 tol=0.00000001 samp=20 C=centroids.mtx isY=1 Y=clusters.mtx verb=1
-
-fileX = $X;
-fileY = ifdef ($Y, "Y.mtx");
-fileC = ifdef ($C, "C.mtx");
-
-num_centroids = $k;
-num_runs   = ifdef ($runs, 10);      # $runs=10;
-max_iter   = ifdef ($maxi, 1000);    # $maxi=1000;
-eps        = ifdef ($tol, 0.000001); # $tol=0.000001;
-is_write_Y = ifdef ($isY, 0);        # $isY=0;
-is_verbose = ifdef ($verb, 0);       # $verb=0;
-fmtCY      = ifdef ($fmt, "text");   # $fmt="text";
-avg_sample_size_per_centroid = ifdef ($samp, 50);  # $samp=50;
-
-
-print ("BEGIN K-MEANS SCRIPT");
-print ("Reading X...");
-
-# X : matrix of data points as rows
-X = read (fileX);
-num_records   = nrow (X);
-num_features  = ncol (X);
-
-sumXsq = sum (X ^ 2);
-# Remark - A useful rewrite: sum (A %*% B) = sum (t(colSums(A)) * rowSums(B))
-
-# STEP 1: INITIALIZE CENTROIDS FOR ALL RUNS FROM DATA SAMPLES:
-
-print ("Taking data samples for initialization...");
-
-[sample_maps, samples_vs_runs_map, sample_block_size] = 
-    get_sample_maps (num_records, num_runs, num_centroids * avg_sample_size_per_centroid);
-
-is_row_in_samples = rowSums (sample_maps);
-X_samples = sample_maps %*% X;
-X_samples_sq_norms = rowSums (X_samples ^ 2);
-
-print ("Initializing the centroids for all runs...");
-All_Centroids = matrix (0, rows = (num_runs * num_centroids), cols = num_features);
-
-# We select centroids according to the k-Means++ heuristic applied to a sample of X
-# Loop invariant: min_distances ~ sq.distances from X_sample rows to nearest centroids,
-# with the out-of-range X_sample positions in min_distances set to 0.0
-
-min_distances = is_row_in_samples;  # Pick the 1-st centroids uniformly at random
-
-for (i in 1 : num_centroids)
-{
-    # "Matricize" and prefix-sum to compute the cumulative distribution function:
-    min_distances_matrix_form = 
-        matrix (min_distances, rows = sample_block_size, cols = num_runs, byrow = FALSE);
-    cdf_min_distances = cumsum (min_distances_matrix_form);
-    
-    # Select the i-th centroid in each sample as a random sample row id with
-    # probability ~ min_distances:
-    random_row = Rand (rows = 1, cols = num_runs, min = 0.0, max = 1.0);  
-    threshold_matrix = random_row * cdf_min_distances [sample_block_size, ];
-    centroid_ids = t(colSums (ppred (cdf_min_distances, threshold_matrix, "<"))) + 1;
-    
-    # Place the selected centroids together, one per run, into a matrix:
-    centroid_placer = matrix (0, rows = num_runs, cols = (sample_block_size * num_runs));
-    centroid_placer_raw = 
-        table (seq (1, num_runs, 1), sample_block_size * seq (0, num_runs - 1, 1) + centroid_ids);
-    centroid_placer [, 1 : ncol (centroid_placer_raw)] = centroid_placer_raw;
-    centroids = centroid_placer %*% X_samples;
-    
-    # Place the selected centroids into their appropriate slots in All_Centroids:
-    centroid_placer = matrix (0, rows = nrow (All_Centroids), cols = num_runs);
-    centroid_placer_raw = 
-        table (seq (i, num_centroids * (num_runs - 1) + i, num_centroids), seq (1, num_runs, 1));
-    centroid_placer [1 : nrow (centroid_placer_raw), ] = centroid_placer_raw;
-    All_Centroids = All_Centroids + centroid_placer %*% centroids;
-    
-    # Update min_distances to preserve the loop invariant:
-    distances = X_samples_sq_norms + samples_vs_runs_map %*% rowSums (centroids ^ 2)
-              - 2 * rowSums (X_samples * (samples_vs_runs_map %*% centroids));
-    if (i == 1) {
-        min_distances = is_row_in_samples * distances;
-    } else {
-        min_distances = min (min_distances, distances);
-}   }
-
-# STEP 2: PERFORM K-MEANS ITERATIONS FOR ALL RUNS:
-
-termination_code = matrix (0, rows = num_runs, cols = 1);
-final_wcss = matrix (0, rows = num_runs, cols = 1);
-num_iterations = matrix (0, rows = num_runs, cols = 1);
-
-print ("Performing k-means iterations for all runs...");
-
-parfor (run_index in 1 : num_runs, check = 0)
-{
-    C = All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ];
-    C_old = C;
-    iter_count = 0;
-    term_code = 0;
-    wcss = 0;
-
-    while (term_code == 0)
-    {
-        # Compute Euclidean squared distances from records (X rows) to centroids (C rows)
-        # without the C-independent term, then take the minimum for each record
-        D = -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
-        minD = rowMins (D);
-        # Compute the current centroid-based within-cluster sum of squares (WCSS)
-        wcss_old = wcss;
-        wcss = sumXsq + sum (minD);
-        if (is_verbose == 1) {
-            if (iter_count == 0) {
-                print ("Run " + run_index + ", At Start-Up:  Centroid WCSS = " + wcss);
-            } else {
-                print ("Run " + run_index + ", Iteration " + iter_count + ":  Centroid WCSS = " + wcss
-                    + ";  Centroid change (avg.sq.dist.) = " + (sum ((C - C_old) ^ 2) / num_centroids));
-        }   }
-        # Check if convergence or maximum iteration has been reached
-        if (wcss_old - wcss < eps * wcss & iter_count > 0) {
-            term_code = 1;  # Convergence is reached
-        } else {
-            if (iter_count >= max_iter) {
-                term_code = 2;  # Maximum iteration is reached
-            } else {
-                iter_count = iter_count + 1;
-                # Find the closest centroid for each record
-                P = ppred (D, minD, "<=");
-                # If some records belong to multiple centroids, share them equally
-                P = P / rowSums (P);
-                # Compute the column normalization factor for P
-                P_denom = colSums (P);
-                if (sum (ppred (P_denom, 0.0, "<=")) > 0) {
-                    term_code = 3;  # There is a "runaway" centroid with 0.0 denominator
-                } else {
-                    C_old = C;
-                    # Compute new centroids as weighted averages over the records
-                    C = (t(P) %*% X) / t(P_denom);
-    }   }   }   }
-    print ("Run " + run_index + ", Iteration " + iter_count + ":  Terminated with code = " + term_code + ",  Centroid WCSS = " + wcss);
-    All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ] = C;
-    final_wcss [run_index, 1] = wcss;
-    termination_code [run_index, 1] = term_code;
-    num_iterations [run_index, 1] = iter_count;
-}
-
-# STEP 3: SELECT THE RUN WITH BEST CENTROID-WCSS AND OUTPUT ITS CENTROIDS:
-
-termination_bitmap = matrix (0, rows = num_runs, cols = 3);
-termination_bitmap_raw = table (seq (1, num_runs, 1), termination_code);
-termination_bitmap [, 1 : ncol(termination_bitmap_raw)] = termination_bitmap_raw;
-termination_stats = colSums (termination_bitmap);
-print ("Number of successful runs = " + as.integer (castAsScalar (termination_stats [1, 1])));
-print ("Number of incomplete runs = " + as.integer (castAsScalar (termination_stats [1, 2])));
-print ("Number of failed runs (with lost centroids) = " + as.integer (castAsScalar (termination_stats [1, 3])));
-
-num_successful_runs = castAsScalar (termination_stats [1, 1]);
-if (num_successful_runs > 0) {
-    final_wcss_successful = final_wcss * termination_bitmap [, 1];
-    worst_wcss = max (final_wcss_successful);
-    best_wcss = min (final_wcss_successful + (10 * worst_wcss + 10) * (1 - termination_bitmap [, 1]));
-    avg_wcss = sum (final_wcss_successful) / num_successful_runs;
-    best_index_vector = ppred (final_wcss_successful, best_wcss, "==");
-    aggr_best_index_vector = cumsum (best_index_vector);
-    best_index = as.integer (sum (ppred (aggr_best_index_vector, 0, "==")) + 1);
-    print ("Successful runs:  Best run is " + best_index + " with Centroid WCSS = " + best_wcss 
-        + ";  Avg WCSS = " + avg_wcss + ";  Worst WCSS = " + worst_wcss);
-    C = All_Centroids [(num_centroids * (best_index - 1) + 1) : (num_centroids * best_index), ];
-    print ("Writing out the best-WCSS centroids...");
-    write (C, fileC, format=fmtCY);
-    if (is_write_Y == 1) {
-        print ("Writing out the best-WCSS cluster labels...");
-        D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
-        P = ppred (D, rowMins (D), "<=");
-        aggr_P = t(cumsum (t(P)));
-        Y = rowSums (ppred (aggr_P, 0, "==")) + 1
-        write (Y, fileY, format=fmtCY);
-    }
-    print ("DONE.");
-} else {
-    stop ("No output is produced.  Try increasing the number of iterations and/or runs.");
-}
-
-
-
-get_sample_maps = function (int num_records, int num_samples, int approx_sample_size)
-    return (Matrix[double] sample_maps, Matrix[double] sample_col_map, int sample_block_size)
-{
-    if (approx_sample_size < num_records) {
-        # Input value "approx_sample_size" is the average sample size; increase it by ~10 std.dev's
-        # to get the sample block size (to allocate space):
-        sample_block_size = as.integer (approx_sample_size + round (10 * sqrt (approx_sample_size)));
-        num_rows = sample_block_size * num_samples;
-        
-        # Generate all samples in parallel by converting uniform random values into random
-        # integer skip-ahead intervals and prefix-summing them:
-        sample_rec_ids = Rand (rows = sample_block_size, cols = num_samples, min = 0.0, max = 1.0);
-        sample_rec_ids = round (log (sample_rec_ids) / log (1.0 - approx_sample_size / num_records) + 0.5);
-        # Prob [k-1 < log(uniform)/log(1-p) < k] = p*(1-p)^(k-1) = Prob [k-1 zeros before a one]
-        sample_rec_ids = cumsum (sample_rec_ids);  #  (skip to next one) --> (skip to i-th one)
-        
-        # Replace all sample record ids over "num_records" (i.e. out of range) by "num_records + 1":
-        is_sample_rec_id_within_range = ppred (sample_rec_ids, num_records, "<=");
-        sample_rec_ids = sample_rec_ids * is_sample_rec_id_within_range 
-                       + (num_records + 1) * (1 - is_sample_rec_id_within_range);
-        
-        # Rearrange all samples (and their out-of-range indicators) into one column-vector:
-        sample_rec_ids = 
-            matrix (sample_rec_ids, rows = num_rows, cols = 1, byrow = FALSE);
-        is_row_in_samples = 
-            matrix (is_sample_rec_id_within_range, rows = num_rows, cols = 1, byrow = FALSE);
-
-        # Use contingency table to create the "sample_maps" matrix that is a vertical concatenation
-        # of 0-1-matrices, one per sample, each with 1s at (i, sample_record[i]) and 0s elsewhere:
-        sample_maps_raw = table (seq (1, num_rows), sample_rec_ids);
-        max_rec_id = ncol (sample_maps_raw);
-        if (max_rec_id >= num_records) {
-            sample_maps = sample_maps_raw [, 1 : num_records];
-        } else {
-            sample_maps = matrix (0, rows = num_rows, cols = num_records);        
-            sample_maps [, 1 : max_rec_id] = sample_maps_raw;
-        }
-        
-        # Create a 0-1-matrix that maps each sample column ID into all row positions of the
-        # corresponding sample; map out-of-sample-range positions to row id = num_rows + 1:
-        sample_positions = (num_rows + 1) - is_row_in_samples * seq (num_rows, 1, -1);
-        # Column ID positions = 1, 1, ..., 1, 2, 2, ..., 2, . . . , n_c, n_c, ..., n_c:
-        col_positions = round (0.5 + seq (0, num_rows - 1, 1) / sample_block_size);
-        sample_col_map = table (sample_positions, col_positions);
-        # Remove the out-of-sample-range positions by cutting off the last row:
-        sample_col_map = sample_col_map [1 : (num_rows), ];
-        
-    } else {
-        one_per_record = matrix (1, rows = num_records, cols = 1);
-        sample_block_size = num_records;
-        sample_maps    = matrix (0, rows = (num_records * num_samples), cols = num_records);
-        sample_col_map = matrix (0, rows = (num_records * num_samples), cols = num_samples);
-        for (i in 1:num_samples) {
-            sample_maps    [(num_records * (i - 1) + 1) : (num_records * i),  ] = diag (one_per_record);
-            sample_col_map [(num_records * (i - 1) + 1) : (num_records * i), i] = one_per_record;
-}   }   }
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# Implements the k-Means clustering algorithm
+#
+# INPUT PARAMETERS:
+# ----------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ----------------------------------------------------------------------------
+# X     String   ---    Location to read matrix X with the input data records
+# k     Int      ---    Number of centroids
+# runs  Int       10    Number of runs (with different initial centroids)
+# maxi  Int     1000    Maximum number of iterations per run
+# tol   Double 0.000001 Tolerance (epsilon) for WCSS change ratio
+# samp  Int       50    Average number of records per centroid in data samples
+# C     String  "C.mtx" Location to store the output matrix with the centroids
+# isY   Int        0    0 = do not write Y,  1 = write Y
+# Y     String  "Y.mtx" Location to store the mapping of records to centroids
+# fmt   String  "text"  Matrix output format, usually "text" or "csv"
+# verb  Int        0    0 = do not print per-iteration stats, 1 = print them
+# ----------------------------------------------------------------------------
+#
+# Example:
+# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 C=centroids.mtx
+# hadoop jar SystemML.jar -f Kmeans.dml -nvargs X=X.mtx k=5 runs=100 maxi=5000 tol=0.00000001 samp=20 C=centroids.mtx isY=1 Y=clusters.mtx verb=1
+
+fileX = $X;
+fileY = ifdef ($Y, "Y.mtx");
+fileC = ifdef ($C, "C.mtx");
+
+num_centroids = $k;
+num_runs   = ifdef ($runs, 10);      # $runs=10;
+max_iter   = ifdef ($maxi, 1000);    # $maxi=1000;
+eps        = ifdef ($tol, 0.000001); # $tol=0.000001;
+is_write_Y = ifdef ($isY, 0);        # $isY=0;
+is_verbose = ifdef ($verb, 0);       # $verb=0;
+fmtCY      = ifdef ($fmt, "text");   # $fmt="text";
+avg_sample_size_per_centroid = ifdef ($samp, 50);  # $samp=50;
+
+
+print ("BEGIN K-MEANS SCRIPT");
+print ("Reading X...");
+
+# X : matrix of data points as rows
+X = read (fileX);
+num_records   = nrow (X);
+num_features  = ncol (X);
+
+sumXsq = sum (X ^ 2);
+# Remark - A useful rewrite: sum (A %*% B) = sum (t(colSums(A)) * rowSums(B))
+
+# STEP 1: INITIALIZE CENTROIDS FOR ALL RUNS FROM DATA SAMPLES:
+
+print ("Taking data samples for initialization...");
+
+[sample_maps, samples_vs_runs_map, sample_block_size] = 
+    get_sample_maps (num_records, num_runs, num_centroids * avg_sample_size_per_centroid);
+
+is_row_in_samples = rowSums (sample_maps);
+X_samples = sample_maps %*% X;
+X_samples_sq_norms = rowSums (X_samples ^ 2);
+
+print ("Initializing the centroids for all runs...");
+All_Centroids = matrix (0, rows = (num_runs * num_centroids), cols = num_features);
+
+# We select centroids according to the k-Means++ heuristic applied to a sample of X
+# Loop invariant: min_distances ~ sq.distances from X_sample rows to nearest centroids,
+# with the out-of-range X_sample positions in min_distances set to 0.0
+
+min_distances = is_row_in_samples;  # Pick the 1-st centroids uniformly at random
+
+for (i in 1 : num_centroids)
+{
+    # "Matricize" and prefix-sum to compute the cumulative distribution function:
+    min_distances_matrix_form = 
+        matrix (min_distances, rows = sample_block_size, cols = num_runs, byrow = FALSE);
+    cdf_min_distances = cumsum (min_distances_matrix_form);
+    
+    # Select the i-th centroid in each sample as a random sample row id with
+    # probability ~ min_distances:
+    random_row = Rand (rows = 1, cols = num_runs, min = 0.0, max = 1.0);  
+    threshold_matrix = random_row * cdf_min_distances [sample_block_size, ];
+    centroid_ids = t(colSums (ppred (cdf_min_distances, threshold_matrix, "<"))) + 1;
+    
+    # Place the selected centroids together, one per run, into a matrix:
+    centroid_placer = matrix (0, rows = num_runs, cols = (sample_block_size * num_runs));
+    centroid_placer_raw = 
+        table (seq (1, num_runs, 1), sample_block_size * seq (0, num_runs - 1, 1) + centroid_ids);
+    centroid_placer [, 1 : ncol (centroid_placer_raw)] = centroid_placer_raw;
+    centroids = centroid_placer %*% X_samples;
+    
+    # Place the selected centroids into their appropriate slots in All_Centroids:
+    centroid_placer = matrix (0, rows = nrow (All_Centroids), cols = num_runs);
+    centroid_placer_raw = 
+        table (seq (i, num_centroids * (num_runs - 1) + i, num_centroids), seq (1, num_runs, 1));
+    centroid_placer [1 : nrow (centroid_placer_raw), ] = centroid_placer_raw;
+    All_Centroids = All_Centroids + centroid_placer %*% centroids;
+    
+    # Update min_distances to preserve the loop invariant:
+    distances = X_samples_sq_norms + samples_vs_runs_map %*% rowSums (centroids ^ 2)
+              - 2 * rowSums (X_samples * (samples_vs_runs_map %*% centroids));
+    if (i == 1) {
+        min_distances = is_row_in_samples * distances;
+    } else {
+        min_distances = min (min_distances, distances);
+}   }
+
+# STEP 2: PERFORM K-MEANS ITERATIONS FOR ALL RUNS:
+
+termination_code = matrix (0, rows = num_runs, cols = 1);
+final_wcss = matrix (0, rows = num_runs, cols = 1);
+num_iterations = matrix (0, rows = num_runs, cols = 1);
+
+print ("Performing k-means iterations for all runs...");
+
+parfor (run_index in 1 : num_runs, check = 0)
+{
+    C = All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ];
+    C_old = C;
+    iter_count = 0;
+    term_code = 0;
+    wcss = 0;
+
+    while (term_code == 0)
+    {
+        # Compute Euclidean squared distances from records (X rows) to centroids (C rows)
+        # without the C-independent term, then take the minimum for each record
+        D = -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
+        minD = rowMins (D);
+        # Compute the current centroid-based within-cluster sum of squares (WCSS)
+        wcss_old = wcss;
+        wcss = sumXsq + sum (minD);
+        if (is_verbose == 1) {
+            if (iter_count == 0) {
+                print ("Run " + run_index + ", At Start-Up:  Centroid WCSS = " + wcss);
+            } else {
+                print ("Run " + run_index + ", Iteration " + iter_count + ":  Centroid WCSS = " + wcss
+                    + ";  Centroid change (avg.sq.dist.) = " + (sum ((C - C_old) ^ 2) / num_centroids));
+        }   }
+        # Check if convergence or maximum iteration has been reached
+        if (wcss_old - wcss < eps * wcss & iter_count > 0) {
+            term_code = 1;  # Convergence is reached
+        } else {
+            if (iter_count >= max_iter) {
+                term_code = 2;  # Maximum iteration is reached
+            } else {
+                iter_count = iter_count + 1;
+                # Find the closest centroid for each record
+                P = ppred (D, minD, "<=");
+                # If some records belong to multiple centroids, share them equally
+                P = P / rowSums (P);
+                # Compute the column normalization factor for P
+                P_denom = colSums (P);
+                if (sum (ppred (P_denom, 0.0, "<=")) > 0) {
+                    term_code = 3;  # There is a "runaway" centroid with 0.0 denominator
+                } else {
+                    C_old = C;
+                    # Compute new centroids as weighted averages over the records
+                    C = (t(P) %*% X) / t(P_denom);
+    }   }   }   }
+    print ("Run " + run_index + ", Iteration " + iter_count + ":  Terminated with code = " + term_code + ",  Centroid WCSS = " + wcss);
+    All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ] = C;
+    final_wcss [run_index, 1] = wcss;
+    termination_code [run_index, 1] = term_code;
+    num_iterations [run_index, 1] = iter_count;
+}
+
+# STEP 3: SELECT THE RUN WITH BEST CENTROID-WCSS AND OUTPUT ITS CENTROIDS:
+
+termination_bitmap = matrix (0, rows = num_runs, cols = 3);
+termination_bitmap_raw = table (seq (1, num_runs, 1), termination_code);
+termination_bitmap [, 1 : ncol(termination_bitmap_raw)] = termination_bitmap_raw;
+termination_stats = colSums (termination_bitmap);
+print ("Number of successful runs = " + as.integer (castAsScalar (termination_stats [1, 1])));
+print ("Number of incomplete runs = " + as.integer (castAsScalar (termination_stats [1, 2])));
+print ("Number of failed runs (with lost centroids) = " + as.integer (castAsScalar (termination_stats [1, 3])));
+
+num_successful_runs = castAsScalar (termination_stats [1, 1]);
+if (num_successful_runs > 0) {
+    final_wcss_successful = final_wcss * termination_bitmap [, 1];
+    worst_wcss = max (final_wcss_successful);
+    best_wcss = min (final_wcss_successful + (10 * worst_wcss + 10) * (1 - termination_bitmap [, 1]));
+    avg_wcss = sum (final_wcss_successful) / num_successful_runs;
+    best_index_vector = ppred (final_wcss_successful, best_wcss, "==");
+    aggr_best_index_vector = cumsum (best_index_vector);
+    best_index = as.integer (sum (ppred (aggr_best_index_vector, 0, "==")) + 1);
+    print ("Successful runs:  Best run is " + best_index + " with Centroid WCSS = " + best_wcss 
+        + ";  Avg WCSS = " + avg_wcss + ";  Worst WCSS = " + worst_wcss);
+    C = All_Centroids [(num_centroids * (best_index - 1) + 1) : (num_centroids * best_index), ];
+    print ("Writing out the best-WCSS centroids...");
+    write (C, fileC, format=fmtCY);
+    if (is_write_Y == 1) {
+        print ("Writing out the best-WCSS cluster labels...");
+        D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
+        P = ppred (D, rowMins (D), "<=");
+        aggr_P = t(cumsum (t(P)));
+        Y = rowSums (ppred (aggr_P, 0, "==")) + 1
+        write (Y, fileY, format=fmtCY);
+    }
+    print ("DONE.");
+} else {
+    stop ("No output is produced.  Try increasing the number of iterations and/or runs.");
+}
+
+
+
+get_sample_maps = function (int num_records, int num_samples, int approx_sample_size)
+    return (Matrix[double] sample_maps, Matrix[double] sample_col_map, int sample_block_size)
+{
+    if (approx_sample_size < num_records) {
+        # Input value "approx_sample_size" is the average sample size; increase it by ~10 std.dev's
+        # to get the sample block size (to allocate space):
+        sample_block_size = as.integer (approx_sample_size + round (10 * sqrt (approx_sample_size)));
+        num_rows = sample_block_size * num_samples;
+        
+        # Generate all samples in parallel by converting uniform random values into random
+        # integer skip-ahead intervals and prefix-summing them:
+        sample_rec_ids = Rand (rows = sample_block_size, cols = num_samples, min = 0.0, max = 1.0);
+        sample_rec_ids = round (log (sample_rec_ids) / log (1.0 - approx_sample_size / num_records) + 0.5);
+        # Prob [k-1 < log(uniform)/log(1-p) < k] = p*(1-p)^(k-1) = Prob [k-1 zeros before a one]
+        sample_rec_ids = cumsum (sample_rec_ids);  #  (skip to next one) --> (skip to i-th one)
+        
+        # Replace all sample record ids over "num_records" (i.e. out of range) by "num_records + 1":
+        is_sample_rec_id_within_range = ppred (sample_rec_ids, num_records, "<=");
+        sample_rec_ids = sample_rec_ids * is_sample_rec_id_within_range 
+                       + (num_records + 1) * (1 - is_sample_rec_id_within_range);
+        
+        # Rearrange all samples (and their out-of-range indicators) into one column-vector:
+        sample_rec_ids = 
+            matrix (sample_rec_ids, rows = num_rows, cols = 1, byrow = FALSE);
+        is_row_in_samples = 
+            matrix (is_sample_rec_id_within_range, rows = num_rows, cols = 1, byrow = FALSE);
+
+        # Use contingency table to create the "sample_maps" matrix that is a vertical concatenation
+        # of 0-1-matrices, one per sample, each with 1s at (i, sample_record[i]) and 0s elsewhere:
+        sample_maps_raw = table (seq (1, num_rows), sample_rec_ids);
+        max_rec_id = ncol (sample_maps_raw);
+        if (max_rec_id >= num_records) {
+            sample_maps = sample_maps_raw [, 1 : num_records];
+        } else {
+            sample_maps = matrix (0, rows = num_rows, cols = num_records);        
+            sample_maps [, 1 : max_rec_id] = sample_maps_raw;
+        }
+        
+        # Create a 0-1-matrix that maps each sample column ID into all row positions of the
+        # corresponding sample; map out-of-sample-range positions to row id = num_rows + 1:
+        sample_positions = (num_rows + 1) - is_row_in_samples * seq (num_rows, 1, -1);
+        # Column ID positions = 1, 1, ..., 1, 2, 2, ..., 2, . . . , n_c, n_c, ..., n_c:
+        col_positions = round (0.5 + seq (0, num_rows - 1, 1) / sample_block_size);
+        sample_col_map = table (sample_positions, col_positions);
+        # Remove the out-of-sample-range positions by cutting off the last row:
+        sample_col_map = sample_col_map [1 : (num_rows), ];
+        
+    } else {
+        one_per_record = matrix (1, rows = num_records, cols = 1);
+        sample_block_size = num_records;
+        sample_maps    = matrix (0, rows = (num_records * num_samples), cols = num_records);
+        sample_col_map = matrix (0, rows = (num_records * num_samples), cols = num_samples);
+        for (i in 1:num_samples) {
+            sample_maps    [(num_records * (i - 1) + 1) : (num_records * i),  ] = diag (one_per_record);
+            sample_col_map [(num_records * (i - 1) + 1) : (num_records * i), i] = one_per_record;
+}   }   }
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/LinearRegCG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegCG.dml b/scripts/algorithms/LinearRegCG.dml
index a485d83..ebfa5a4 100644
--- a/scripts/algorithms/LinearRegCG.dml
+++ b/scripts/algorithms/LinearRegCG.dml
@@ -1,286 +1,286 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM
-#
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# --------------------------------------------------------------------------------------------
-# X     String  ---     Location (on HDFS) to read the matrix X of feature vectors
-# Y     String  ---     Location (on HDFS) to read the 1-column matrix Y of response values
-# B     String  ---     Location to store estimated regression parameters (the betas)
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
-# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:
-#                       0 = no intercept, no shifting, no rescaling;
-#                       1 = add intercept, but neither shift nor rescale X;
-#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; set to nonzero
-#                       for highly dependend/sparse/numerous features
-# tol   Double 0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if
-#                       L2 norm of the beta-residual is less than tolerance * its initial norm
-# maxi  Int      0      Maximum number of conjugate gradient iterations, 0 = no maximum
-# fmt   String "text"   Matrix output format for B (the betas) only, usually "text" or "csv"
-# --------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                        Col.2: betas for shifted/rescaled X and intercept
-#
-# In addition, some regression statistics are provided in CSV format, one comma-separated
-# name-value pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------
-# AVG_TOT_Y             Average of the response value Y
-# STDEV_TOT_Y           Standard Deviation of the response value Y
-# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
-# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
-# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
-# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
-# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
-# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
-# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
-# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
-# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
-# -------------------------------------------------------------------------------------
-# * The last two statistics are only printed if there is no intercept (icpt=0)
-#
-# The Log file, when requested, contains the following per-iteration variables in CSV
-# format, each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for
-# initial values:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------
-# CG_RESIDUAL_NORM      L2-norm of Conj.Grad.residual, which is A %*% beta - t(X) %*% y
-#                           where A = t(X) %*% X + diag (lambda), or a similar quantity
-# CG_RESIDUAL_RATIO     Ratio of current L2-norm of Conj.Grad.residual over the initial
-# -------------------------------------------------------------------------------------
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f LinearRegCG.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/B
-#     O=OUTPUT_DIR/Out icpt=2 reg=1.0 tol=0.001 maxi=100 fmt=csv Log=OUTPUT_DIR/log
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileO = ifdef ($O, " ");
-fileLog = ifdef ($Log, " ");
-fmtB = ifdef ($fmt, "text");
-
-intercept_status = ifdef ($icpt, 0);     # $icpt=0;
-tolerance = ifdef ($tol, 0.000001);      # $tol=0.000001;
-max_iteration = ifdef ($maxi, 0);        # $maxi=0;
-regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
-
-print ("BEGIN LINEAR REGRESSION SCRIPT");
-print ("Reading X and Y...");
-X = read (fileX);
-y = read (fileY);
-
-n = nrow (X);
-m = ncol (X);
-ones_n = matrix (1, rows = n, cols = 1);
-zero_cell = matrix (0, rows = 1, cols = 1);
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-
-m_ext = m;
-if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
-{
-    X = append (X, ones_n);
-    m_ext = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = m_ext, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
-    scale_lambda [m_ext, 1] = 0;
-}
-
-if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
-{                           # Important assumption: X [, m_ext] = ones_n
-    avg_X_cols = t(colSums(X)) / n;
-    var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
-    is_unsafe = ppred (var_X_cols, 0.0, "<=");
-    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [m_ext, 1] = 1;
-    shift_X = - avg_X_cols * scale_X;
-    shift_X [m_ext, 1] = 0;
-} else {
-    scale_X = matrix (1, rows = m_ext, cols = 1);
-    shift_X = matrix (0, rows = m_ext, cols = 1);
-}
-
-# Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)"
-# instead of "X".  However, in order to preserve the sparsity of X,
-# we apply the transform associatively to some other part of the expression
-# in which it occurs.  To avoid materializing a large matrix, we rewrite it:
-#
-# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-# ssX_A  = diag (scale_X) %*% A;
-# ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-
-lambda = scale_lambda * regularization;
-beta_unscaled = matrix (0, rows = m_ext, cols = 1);
-
-if (max_iteration == 0) {
-    max_iteration = m_ext;
-}
-i = 0;
-
-# BEGIN THE CONJUGATE GRADIENT ALGORITHM
-print ("Running the CG algorithm...");
-
-r = - t(X) %*% y;
-
-if (intercept_status == 2) {
-    r = scale_X * r + shift_X %*% r [m_ext, ];
-}
-
-p = - r;
-norm_r2 = sum (r ^ 2);
-norm_r2_initial = norm_r2;
-norm_r2_target = norm_r2_initial * tolerance ^ 2;
-print ("||r|| initial value = " + sqrt (norm_r2_initial) + ",  target value = " + sqrt (norm_r2_target));
-log_str = "CG_RESIDUAL_NORM,0," + sqrt (norm_r2_initial);
-log_str = append (log_str, "CG_RESIDUAL_RATIO,0,1.0");
-
-while (i < max_iteration & norm_r2 > norm_r2_target)
-{
-    if (intercept_status == 2) {
-        ssX_p = scale_X * p;
-        ssX_p [m_ext, ] = ssX_p [m_ext, ] + t(shift_X) %*% p;
-    } else {
-        ssX_p = p;
-    }
-    
-    q = t(X) %*% (X %*% ssX_p);
-
-    if (intercept_status == 2) {
-        q = scale_X * q + shift_X %*% q [m_ext, ];
-    }
-
-	q = q + lambda * p;
-	a = norm_r2 / sum (p * q);
-	beta_unscaled = beta_unscaled + a * p;
-	r = r + a * q;
-	old_norm_r2 = norm_r2;
-	norm_r2 = sum (r ^ 2);
-	p = -r + (norm_r2 / old_norm_r2) * p;
-	i = i + 1;
-	print ("Iteration " + i + ":  ||r|| / ||r init|| = " + sqrt (norm_r2 / norm_r2_initial));
-	log_str = append (log_str, "CG_RESIDUAL_NORM,"  + i + "," + sqrt (norm_r2));
-    log_str = append (log_str, "CG_RESIDUAL_RATIO," + i + "," + sqrt (norm_r2 / norm_r2_initial));
-}
-
-if (i >= max_iteration) {
-    print ("Warning: the maximum number of iterations has been reached.");
-}
-print ("The CG algorithm is done.");
-# END THE CONJUGATE GRADIENT ALGORITHM
-
-if (intercept_status == 2) {
-    beta = scale_X * beta_unscaled;
-    beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
-} else {
-    beta = beta_unscaled;
-}
-
-print ("Computing the statistics...");
-
-avg_tot = sum (y) / n;
-ss_tot = sum (y ^ 2);
-ss_avg_tot = ss_tot - n * avg_tot ^ 2;
-var_tot = ss_avg_tot / (n - 1);
-y_residual = y - X %*% beta;
-avg_res = sum (y_residual) / n;
-ss_res = sum (y_residual ^ 2);
-ss_avg_res = ss_res - n * avg_res ^ 2;
-
-plain_R2 = 1 - ss_res / ss_avg_tot;
-if (n > m_ext) {
-    dispersion  = ss_res / (n - m_ext);
-    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-} else {
-    dispersion  = 0.0 / 0.0;
-    adjusted_R2 = 0.0 / 0.0;
-}
-
-plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
-deg_freedom = n - m - 1;
-if (deg_freedom > 0) {
-    var_res = ss_avg_res / deg_freedom;
-    adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
-} else {
-    var_res = 0.0 / 0.0;
-    adjusted_R2_nobias = 0.0 / 0.0;
-    print ("Warning: zero or negative number of degrees of freedom.");
-}
-
-plain_R2_vs_0 = 1 - ss_res / ss_tot;
-if (n > m) {
-    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-} else {
-    adjusted_R2_vs_0 = 0.0 / 0.0;
-}
-
-str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
-str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
-str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
-str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
-str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
-str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
-str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
-str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
-str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
-if (intercept_status == 0) {
-    str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
-    str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
-}
-
-if (fileO != " ") {
-    write (str, fileO);
-} else {
-    print (str);
-}
-
-# Prepare the output matrix
-print ("Writing the output matrix...");
-
-if (intercept_status == 2) {
-    beta_out = append (beta, beta_unscaled);
-} else {
-    beta_out = beta;
-}
-write (beta_out, fileB, format=fmtB);
-
-if (fileLog != " ") {
-    write (log_str, fileLog);
-}
-print ("END LINEAR REGRESSION SCRIPT");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM
+#
+# INPUT PARAMETERS:
+# --------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# --------------------------------------------------------------------------------------------
+# X     String  ---     Location (on HDFS) to read the matrix X of feature vectors
+# Y     String  ---     Location (on HDFS) to read the 1-column matrix Y of response values
+# B     String  ---     Location to store estimated regression parameters (the betas)
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# Log   String  " "     Location to write per-iteration variables for log/debugging purposes
+# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; set to nonzero
+#                       for highly dependend/sparse/numerous features
+# tol   Double 0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if
+#                       L2 norm of the beta-residual is less than tolerance * its initial norm
+# maxi  Int      0      Maximum number of conjugate gradient iterations, 0 = no maximum
+# fmt   String "text"   Matrix output format for B (the betas) only, usually "text" or "csv"
+# --------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                        Col.2: betas for shifted/rescaled X and intercept
+#
+# In addition, some regression statistics are provided in CSV format, one comma-separated
+# name-value pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------
+# AVG_TOT_Y             Average of the response value Y
+# STDEV_TOT_Y           Standard Deviation of the response value Y
+# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
+# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
+# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
+# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
+# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
+# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
+# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
+# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
+# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
+# -------------------------------------------------------------------------------------
+# * The last two statistics are only printed if there is no intercept (icpt=0)
+#
+# The Log file, when requested, contains the following per-iteration variables in CSV
+# format, each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for
+# initial values:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------
+# CG_RESIDUAL_NORM      L2-norm of Conj.Grad.residual, which is A %*% beta - t(X) %*% y
+#                           where A = t(X) %*% X + diag (lambda), or a similar quantity
+# CG_RESIDUAL_RATIO     Ratio of current L2-norm of Conj.Grad.residual over the initial
+# -------------------------------------------------------------------------------------
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f LinearRegCG.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/B
+#     O=OUTPUT_DIR/Out icpt=2 reg=1.0 tol=0.001 maxi=100 fmt=csv Log=OUTPUT_DIR/log
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileO = ifdef ($O, " ");
+fileLog = ifdef ($Log, " ");
+fmtB = ifdef ($fmt, "text");
+
+intercept_status = ifdef ($icpt, 0);     # $icpt=0;
+tolerance = ifdef ($tol, 0.000001);      # $tol=0.000001;
+max_iteration = ifdef ($maxi, 0);        # $maxi=0;
+regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
+
+print ("BEGIN LINEAR REGRESSION SCRIPT");
+print ("Reading X and Y...");
+X = read (fileX);
+y = read (fileY);
+
+n = nrow (X);
+m = ncol (X);
+ones_n = matrix (1, rows = n, cols = 1);
+zero_cell = matrix (0, rows = 1, cols = 1);
+
+# Introduce the intercept, shift and rescale the columns of X if needed
+
+m_ext = m;
+if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
+{
+    X = append (X, ones_n);
+    m_ext = ncol (X);
+}
+
+scale_lambda = matrix (1, rows = m_ext, cols = 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+    scale_lambda [m_ext, 1] = 0;
+}
+
+if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+{                           # Important assumption: X [, m_ext] = ones_n
+    avg_X_cols = t(colSums(X)) / n;
+    var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
+    is_unsafe = ppred (var_X_cols, 0.0, "<=");
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [m_ext, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [m_ext, 1] = 0;
+} else {
+    scale_X = matrix (1, rows = m_ext, cols = 1);
+    shift_X = matrix (0, rows = m_ext, cols = 1);
+}
+
+# Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)"
+# instead of "X".  However, in order to preserve the sparsity of X,
+# we apply the transform associatively to some other part of the expression
+# in which it occurs.  To avoid materializing a large matrix, we rewrite it:
+#
+# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+# ssX_A  = diag (scale_X) %*% A;
+# ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A;
+#
+# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+# tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
+
+lambda = scale_lambda * regularization;
+beta_unscaled = matrix (0, rows = m_ext, cols = 1);
+
+if (max_iteration == 0) {
+    max_iteration = m_ext;
+}
+i = 0;
+
+# BEGIN THE CONJUGATE GRADIENT ALGORITHM
+print ("Running the CG algorithm...");
+
+r = - t(X) %*% y;
+
+if (intercept_status == 2) {
+    r = scale_X * r + shift_X %*% r [m_ext, ];
+}
+
+p = - r;
+norm_r2 = sum (r ^ 2);
+norm_r2_initial = norm_r2;
+norm_r2_target = norm_r2_initial * tolerance ^ 2;
+print ("||r|| initial value = " + sqrt (norm_r2_initial) + ",  target value = " + sqrt (norm_r2_target));
+log_str = "CG_RESIDUAL_NORM,0," + sqrt (norm_r2_initial);
+log_str = append (log_str, "CG_RESIDUAL_RATIO,0,1.0");
+
+while (i < max_iteration & norm_r2 > norm_r2_target)
+{
+    if (intercept_status == 2) {
+        ssX_p = scale_X * p;
+        ssX_p [m_ext, ] = ssX_p [m_ext, ] + t(shift_X) %*% p;
+    } else {
+        ssX_p = p;
+    }
+    
+    q = t(X) %*% (X %*% ssX_p);
+
+    if (intercept_status == 2) {
+        q = scale_X * q + shift_X %*% q [m_ext, ];
+    }
+
+	q = q + lambda * p;
+	a = norm_r2 / sum (p * q);
+	beta_unscaled = beta_unscaled + a * p;
+	r = r + a * q;
+	old_norm_r2 = norm_r2;
+	norm_r2 = sum (r ^ 2);
+	p = -r + (norm_r2 / old_norm_r2) * p;
+	i = i + 1;
+	print ("Iteration " + i + ":  ||r|| / ||r init|| = " + sqrt (norm_r2 / norm_r2_initial));
+	log_str = append (log_str, "CG_RESIDUAL_NORM,"  + i + "," + sqrt (norm_r2));
+    log_str = append (log_str, "CG_RESIDUAL_RATIO," + i + "," + sqrt (norm_r2 / norm_r2_initial));
+}
+
+if (i >= max_iteration) {
+    print ("Warning: the maximum number of iterations has been reached.");
+}
+print ("The CG algorithm is done.");
+# END THE CONJUGATE GRADIENT ALGORITHM
+
+if (intercept_status == 2) {
+    beta = scale_X * beta_unscaled;
+    beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
+} else {
+    beta = beta_unscaled;
+}
+
+print ("Computing the statistics...");
+
+avg_tot = sum (y) / n;
+ss_tot = sum (y ^ 2);
+ss_avg_tot = ss_tot - n * avg_tot ^ 2;
+var_tot = ss_avg_tot / (n - 1);
+y_residual = y - X %*% beta;
+avg_res = sum (y_residual) / n;
+ss_res = sum (y_residual ^ 2);
+ss_avg_res = ss_res - n * avg_res ^ 2;
+
+plain_R2 = 1 - ss_res / ss_avg_tot;
+if (n > m_ext) {
+    dispersion  = ss_res / (n - m_ext);
+    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
+} else {
+    dispersion  = 0.0 / 0.0;
+    adjusted_R2 = 0.0 / 0.0;
+}
+
+plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
+deg_freedom = n - m - 1;
+if (deg_freedom > 0) {
+    var_res = ss_avg_res / deg_freedom;
+    adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
+} else {
+    var_res = 0.0 / 0.0;
+    adjusted_R2_nobias = 0.0 / 0.0;
+    print ("Warning: zero or negative number of degrees of freedom.");
+}
+
+plain_R2_vs_0 = 1 - ss_res / ss_tot;
+if (n > m) {
+    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
+} else {
+    adjusted_R2_vs_0 = 0.0 / 0.0;
+}
+
+str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
+str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
+str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
+str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
+str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
+str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
+str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
+str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
+str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
+if (intercept_status == 0) {
+    str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
+    str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
+}
+
+if (fileO != " ") {
+    write (str, fileO);
+} else {
+    print (str);
+}
+
+# Prepare the output matrix
+print ("Writing the output matrix...");
+
+if (intercept_status == 2) {
+    beta_out = append (beta, beta_unscaled);
+} else {
+    beta_out = beta;
+}
+write (beta_out, fileB, format=fmtB);
+
+if (fileLog != " ") {
+    write (log_str, fileLog);
+}
+print ("END LINEAR REGRESSION SCRIPT");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/LinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegDS.dml b/scripts/algorithms/LinearRegDS.dml
index 501acc8..0ec663a 100644
--- a/scripts/algorithms/LinearRegDS.dml
+++ b/scripts/algorithms/LinearRegDS.dml
@@ -1,224 +1,224 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT SOLVES LINEAR REGRESSION USING A DIRECT SOLVER FOR (X^T X + lambda) beta = X^T y
-#
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# --------------------------------------------------------------------------------------------
-# X     String  ---     Location (on HDFS) to read the matrix X of feature vectors
-# Y     String  ---     Location (on HDFS) to read the 1-column matrix Y of response values
-# B     String  ---     Location to store estimated regression parameters (the betas)
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:
-#                       0 = no intercept, no shifting, no rescaling;
-#                       1 = add intercept, but neither shift nor rescale X;
-#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; set to nonzero
-#                       for highly dependend/sparse/numerous features
-# fmt   String "text"   Matrix output format for B (the betas) only, usually "text" or "csv"
-# --------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                        Col.2: betas for shifted/rescaled X and intercept
-#
-# In addition, some regression statistics are provided in CSV format, one comma-separated
-# name-value pair per each line, as follows:
-#
-# NAME                  MEANING
-# -------------------------------------------------------------------------------------
-# AVG_TOT_Y             Average of the response value Y
-# STDEV_TOT_Y           Standard Deviation of the response value Y
-# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
-# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
-# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
-# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
-# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
-# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
-# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
-# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
-# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
-# -------------------------------------------------------------------------------------
-# * The last two statistics are only printed if there is no intercept (icpt=0)
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f LinearRegDS.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/B
-#     O=OUTPUT_DIR/Out icpt=2 reg=1.0 fmt=csv
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileO = ifdef ($O, " ");
-fmtB  = ifdef ($fmt, "text");
-
-intercept_status = ifdef ($icpt, 0);     # $icpt=0;
-regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
-
-print ("BEGIN LINEAR REGRESSION SCRIPT");
-print ("Reading X and Y...");
-X = read (fileX);
-y = read (fileY);
-
-n = nrow (X);
-m = ncol (X);
-ones_n = matrix (1, rows = n, cols = 1);
-zero_cell = matrix (0, rows = 1, cols = 1);
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-
-m_ext = m;
-if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
-{
-    X = append (X, ones_n);
-    m_ext = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = m_ext, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
-    scale_lambda [m_ext, 1] = 0;
-}
-
-if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
-{                           # Important assumption: X [, m_ext] = ones_n
-    avg_X_cols = t(colSums(X)) / n;
-    var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
-    is_unsafe = ppred (var_X_cols, 0.0, "<=");
-    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [m_ext, 1] = 1;
-    shift_X = - avg_X_cols * scale_X;
-    shift_X [m_ext, 1] = 0;
-} else {
-    scale_X = matrix (1, rows = m_ext, cols = 1);
-    shift_X = matrix (0, rows = m_ext, cols = 1);
-}
-
-# Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)"
-# instead of "X".  However, in order to preserve the sparsity of X,
-# we apply the transform associatively to some other part of the expression
-# in which it occurs.  To avoid materializing a large matrix, we rewrite it:
-#
-# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
-# ssX_A  = diag (scale_X) %*% A;
-# ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-
-lambda = scale_lambda * regularization;
-
-# BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
-
-A = t(X) %*% X;
-b = t(X) %*% y;
-if (intercept_status == 2) {
-    A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
-    A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-    b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
-}
-A = A + diag (lambda);
-
-print ("Calling the Direct Solver...");
-
-beta_unscaled = solve (A, b);
-
-# END THE DIRECT SOLVE ALGORITHM
-
-if (intercept_status == 2) {
-    beta = scale_X * beta_unscaled;
-    beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
-} else {
-    beta = beta_unscaled;
-}
-
-print ("Computing the statistics...");
-
-avg_tot = sum (y) / n;
-ss_tot = sum (y ^ 2);
-ss_avg_tot = ss_tot - n * avg_tot ^ 2;
-var_tot = ss_avg_tot / (n - 1);
-y_residual = y - X %*% beta;
-avg_res = sum (y_residual) / n;
-ss_res = sum (y_residual ^ 2);
-ss_avg_res = ss_res - n * avg_res ^ 2;
-
-plain_R2 = 1 - ss_res / ss_avg_tot;
-if (n > m_ext) {
-    dispersion  = ss_res / (n - m_ext);
-    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-} else {
-    dispersion  = 0.0 / 0.0;
-    adjusted_R2 = 0.0 / 0.0;
-}
-
-plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
-deg_freedom = n - m - 1;
-if (deg_freedom > 0) {
-    var_res = ss_avg_res / deg_freedom;
-    adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
-} else {
-    var_res = 0.0 / 0.0;
-    adjusted_R2_nobias = 0.0 / 0.0;
-    print ("Warning: zero or negative number of degrees of freedom.");
-}
-
-plain_R2_vs_0 = 1 - ss_res / ss_tot;
-if (n > m) {
-    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-} else {
-    adjusted_R2_vs_0 = 0.0 / 0.0;
-}
-
-str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
-str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
-str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
-str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
-str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
-str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
-str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
-str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
-str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
-if (intercept_status == 0) {
-    str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
-    str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
-}
-
-if (fileO != " ") {
-    write (str, fileO);
-} else {
-    print (str);
-}
-
-# Prepare the output matrix
-print ("Writing the output matrix...");
-
-if (intercept_status == 2) {
-    beta_out = append (beta, beta_unscaled);
-} else {
-    beta_out = beta;
-}
-write (beta_out, fileB, format=fmtB);
-print ("END LINEAR REGRESSION SCRIPT");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT SOLVES LINEAR REGRESSION USING A DIRECT SOLVER FOR (X^T X + lambda) beta = X^T y
+#
+# INPUT PARAMETERS:
+# --------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# --------------------------------------------------------------------------------------------
+# X     String  ---     Location (on HDFS) to read the matrix X of feature vectors
+# Y     String  ---     Location (on HDFS) to read the 1-column matrix Y of response values
+# B     String  ---     Location to store estimated regression parameters (the betas)
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; set to nonzero
+#                       for highly dependend/sparse/numerous features
+# fmt   String "text"   Matrix output format for B (the betas) only, usually "text" or "csv"
+# --------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                        Col.2: betas for shifted/rescaled X and intercept
+#
+# In addition, some regression statistics are provided in CSV format, one comma-separated
+# name-value pair per each line, as follows:
+#
+# NAME                  MEANING
+# -------------------------------------------------------------------------------------
+# AVG_TOT_Y             Average of the response value Y
+# STDEV_TOT_Y           Standard Deviation of the response value Y
+# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias
+# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
+# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.
+# PLAIN_R2              Plain R^2 of residual with bias included vs. total average
+# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average
+# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total average
+# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average
+# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant
+# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant
+# -------------------------------------------------------------------------------------
+# * The last two statistics are only printed if there is no intercept (icpt=0)
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f LinearRegDS.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y B=OUTPUT_DIR/B
+#     O=OUTPUT_DIR/Out icpt=2 reg=1.0 fmt=csv
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileO = ifdef ($O, " ");
+fmtB  = ifdef ($fmt, "text");
+
+intercept_status = ifdef ($icpt, 0);     # $icpt=0;
+regularization = ifdef ($reg, 0.000001); # $reg=0.000001;
+
+print ("BEGIN LINEAR REGRESSION SCRIPT");
+print ("Reading X and Y...");
+X = read (fileX);
+y = read (fileY);
+
+n = nrow (X);
+m = ncol (X);
+ones_n = matrix (1, rows = n, cols = 1);
+zero_cell = matrix (0, rows = 1, cols = 1);
+
+# Introduce the intercept, shift and rescale the columns of X if needed
+
+m_ext = m;
+if (intercept_status == 1 | intercept_status == 2)  # add the intercept column
+{
+    X = append (X, ones_n);
+    m_ext = ncol (X);
+}
+
+scale_lambda = matrix (1, rows = m_ext, cols = 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+    scale_lambda [m_ext, 1] = 0;
+}
+
+if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+{                           # Important assumption: X [, m_ext] = ones_n
+    avg_X_cols = t(colSums(X)) / n;
+    var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
+    is_unsafe = ppred (var_X_cols, 0.0, "<=");
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [m_ext, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [m_ext, 1] = 0;
+} else {
+    scale_X = matrix (1, rows = m_ext, cols = 1);
+    shift_X = matrix (0, rows = m_ext, cols = 1);
+}
+
+# Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)"
+# instead of "X".  However, in order to preserve the sparsity of X,
+# we apply the transform associatively to some other part of the expression
+# in which it occurs.  To avoid materializing a large matrix, we rewrite it:
+#
+# ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+# ssX_A  = diag (scale_X) %*% A;
+# ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A;
+#
+# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+# tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
+
+lambda = scale_lambda * regularization;
+
+# BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
+
+A = t(X) %*% X;
+b = t(X) %*% y;
+if (intercept_status == 2) {
+    A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
+    A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
+    b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
+}
+A = A + diag (lambda);
+
+print ("Calling the Direct Solver...");
+
+beta_unscaled = solve (A, b);
+
+# END THE DIRECT SOLVE ALGORITHM
+
+if (intercept_status == 2) {
+    beta = scale_X * beta_unscaled;
+    beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
+} else {
+    beta = beta_unscaled;
+}
+
+print ("Computing the statistics...");
+
+avg_tot = sum (y) / n;
+ss_tot = sum (y ^ 2);
+ss_avg_tot = ss_tot - n * avg_tot ^ 2;
+var_tot = ss_avg_tot / (n - 1);
+y_residual = y - X %*% beta;
+avg_res = sum (y_residual) / n;
+ss_res = sum (y_residual ^ 2);
+ss_avg_res = ss_res - n * avg_res ^ 2;
+
+plain_R2 = 1 - ss_res / ss_avg_tot;
+if (n > m_ext) {
+    dispersion  = ss_res / (n - m_ext);
+    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
+} else {
+    dispersion  = 0.0 / 0.0;
+    adjusted_R2 = 0.0 / 0.0;
+}
+
+plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
+deg_freedom = n - m - 1;
+if (deg_freedom > 0) {
+    var_res = ss_avg_res / deg_freedom;
+    adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
+} else {
+    var_res = 0.0 / 0.0;
+    adjusted_R2_nobias = 0.0 / 0.0;
+    print ("Warning: zero or negative number of degrees of freedom.");
+}
+
+plain_R2_vs_0 = 1 - ss_res / ss_tot;
+if (n > m) {
+    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
+} else {
+    adjusted_R2_vs_0 = 0.0 / 0.0;
+}
+
+str = "AVG_TOT_Y," + avg_tot;                                    #  Average of the response value Y
+str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard Deviation of the response value Y
+str = append (str, "AVG_RES_Y," + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias
+str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)
+str = append (str, "DISPERSION," + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
+str = append (str, "PLAIN_R2," + plain_R2);                      #  Plain R^2 of residual with bias included vs. total average
+str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average
+str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average
+str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average
+if (intercept_status == 0) {
+    str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant
+    str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
+}
+
+if (fileO != " ") {
+    write (str, fileO);
+} else {
+    print (str);
+}
+
+# Prepare the output matrix
+print ("Writing the output matrix...");
+
+if (intercept_status == 2) {
+    beta_out = append (beta, beta_unscaled);
+} else {
+    beta_out = beta;
+}
+write (beta_out, fileB, format=fmtB);
+print ("END LINEAR REGRESSION SCRIPT");


[19/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
index d0ce8d1..77df03b 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = read($X)
-C = read($Y)
-laplace_correction = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, rows=numClasses, cols=numFeatures)
-parfor (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", ngroups=as.integer(numClasses))
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, rows=1, cols=numFeatures)
-repClassSums = classSums %*% ones
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
-
-# Compute class priors
-class_counts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses))
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, rows=numRows, cols=1)
-D_w_ones = append(D, ones)
-model = append(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = rowIndexMax(log_probs)
-acc = sum(ppred(pred, C, "==")) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-write(acc_str, $accuracy)
-
-# write out the model
-write(class_prior, $prior, format=fmt);
-write(class_conditionals, $conditionals, format=fmt);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = read($X)
+C = read($Y)
+laplace_correction = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, rows=numClasses, cols=numFeatures)
+parfor (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", ngroups=as.integer(numClasses))
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, rows=1, cols=numFeatures)
+repClassSums = classSums %*% ones
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
+
+# Compute class priors
+class_counts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses))
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, rows=numRows, cols=1)
+D_w_ones = append(D, ones)
+model = append(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = rowIndexMax(log_probs)
+acc = sum(ppred(pred, C, "==")) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+write(acc_str, $accuracy)
+
+# write out the model
+write(class_prior, $prior, format=fmt);
+write(class_conditionals, $conditionals, format=fmt);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
index 5d84951..462b330 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
@@ -1,79 +1,79 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = load($X)
-C = load($Y)
-laplace_correction = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = full(0, rows=numClasses, cols=numFeatures)
-parfor (i in 1:numFeatures):
-    Col = D[,i]
-    classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", ngroups=numClasses)
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = full(1, rows=1, cols=numFeatures)
-repClassSums = dot(classSums, ones)
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
-
-# Compute class priors
-class_counts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
-class_prior = class_counts / numRows
-
-# Compute accuracy on training set
-ones = full(1, rows=numRows, cols=1)
-D_w_ones = append(D, ones)
-model = append(class_conditionals, class_prior)
-log_model = log(model)
-transpose_log_model = log_model.transpose()
-log_probs = dot(D_w_ones, transpose_log_model)
-pred = rowIndexMax(log_probs)
-acc = sum(ppred(pred, C, "==")) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-save(acc_str, $accuracy)
-
-# write out the model
-save(class_prior, $prior, format=fmt)
-save(class_conditionals, $conditionals, format=fmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = load($X)
+C = load($Y)
+laplace_correction = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = full(0, rows=numClasses, cols=numFeatures)
+parfor (i in 1:numFeatures):
+    Col = D[,i]
+    classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", ngroups=numClasses)
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = full(1, rows=1, cols=numFeatures)
+repClassSums = dot(classSums, ones)
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
+
+# Compute class priors
+class_counts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
+class_prior = class_counts / numRows
+
+# Compute accuracy on training set
+ones = full(1, rows=numRows, cols=1)
+D_w_ones = append(D, ones)
+model = append(class_conditionals, class_prior)
+log_model = log(model)
+transpose_log_model = log_model.transpose()
+log_probs = dot(D_w_ones, transpose_log_model)
+pred = rowIndexMax(log_probs)
+acc = sum(ppred(pred, C, "==")) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+save(acc_str, $accuracy)
+
+# write out the model
+save(class_prior, $prior, format=fmt)
+save(class_conditionals, $conditionals, format=fmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes/naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.R b/src/test/scripts/applications/naive-bayes/naive-bayes.R
index dc65b8a..a3ca47a 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.R
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = as.double(args[3]);
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, 1, numFeatures)
-repClassSums = classSums %*% ones;
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, numRows, 1)
-D_w_ones = cbind(D, ones)
-model = cbind(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = max.col(log_probs,ties.method="last");
-acc = sum(pred == C) / numRows * 100
-
-print(paste("Training Accuracy (%): ", acc, sep=""))
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = as.double(args[3]);
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, 1, numFeatures)
+repClassSums = classSums %*% ones;
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, numRows, 1)
+D_w_ones = cbind(D, ones)
+model = cbind(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = max.col(log_probs,ties.method="last");
+acc = sum(pred == C) / numRows * 100
+
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes/naive-bayes.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.dml b/src/test/scripts/applications/naive-bayes/naive-bayes.dml
index 1fe1bf4..a81edea 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.dml
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.dml
@@ -1,67 +1,67 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = read($X)
-C = read($Y)
-laplaceCorrection = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-# Compute the feature counts for each class
-classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=as.integer(numClasses));
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
-
-# Compute class conditional probabilities
-classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
-
-# Compute class priors
-classCounts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses))
-classPrior = classCounts / numRows;
-
-# Compute accuracy on training set
-logProbs = D %*% t(log(classConditionals)) + t(log(classPrior));
-acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-write(acc_str, $accuracy)
-
-# write out the model
-write(classPrior, $prior, format=fmt);
-write(classConditionals, $conditionals, format=fmt);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = read($X)
+C = read($Y)
+laplaceCorrection = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+# Compute the feature counts for each class
+classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=as.integer(numClasses));
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
+
+# Compute class conditional probabilities
+classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
+
+# Compute class priors
+classCounts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses))
+classPrior = classCounts / numRows;
+
+# Compute accuracy on training set
+logProbs = D %*% t(log(classConditionals)) + t(log(classPrior));
+acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+write(acc_str, $accuracy)
+
+# write out the model
+write(classPrior, $prior, format=fmt);
+write(classConditionals, $conditionals, format=fmt);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.pydml b/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
index 25fbf84..480265d 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = load($X)
-C = load($Y)
-laplaceCorrection = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-# Compute the feature counts for each class
-classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=numClasses);
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
-
-# Compute class conditional probabilities
-classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
-
-# Compute class priors
-classCounts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
-classPrior = classCounts / numRows
-
-# Compute accuracy on training set
-lmodel1 = log(classConditionals)
-lmodel2 = log(classPrior)
-tlmodel1 = lmodel1.transpose()
-tlmodel2 = lmodel2.transpose()
-logProbs = dot(D, tlmodel1) + tlmodel2
-acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-save(acc_str, $accuracy)
-
-# write out the model
-save(classPrior, $prior, format=fmt)
-save(classConditionals, $conditionals, format=fmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = load($X)
+C = load($Y)
+laplaceCorrection = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+# Compute the feature counts for each class
+classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=numClasses);
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
+
+# Compute class conditional probabilities
+classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
+
+# Compute class priors
+classCounts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
+classPrior = classCounts / numRows
+
+# Compute accuracy on training set
+lmodel1 = log(classConditionals)
+lmodel2 = log(classPrior)
+tlmodel1 = lmodel1.transpose()
+tlmodel2 = lmodel2.transpose()
+logProbs = dot(D, tlmodel1) + tlmodel2
+acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+save(acc_str, $accuracy)
+
+# write out the model
+save(classPrior, $prior, format=fmt)
+save(classConditionals, $conditionals, format=fmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate.R b/src/test/scripts/applications/parfor/parfor_bivariate.R
index 889b22b..82a5be5 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate.R
+++ b/src/test/scripts/applications/parfor/parfor_bivariate.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-
-D1 <- readMM(paste(args[1], "D.mtx", sep=""))
-S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
-S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
-K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
-K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
-D <- as.matrix(D1);
-S1 <- as.matrix(S11);
-S2 <- as.matrix(S21);
-K1 <- as.matrix(K11);
-K2 <- as.matrix(K21);
-
-numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
-maxC <- args[2]; # max number of categories in any categorical attribute
-
-s1size <- ncol(S1);
-s2size <- ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats <- 8;
-basestats <- array(0,dim=c(numstats,numPairs)); 
-cat_counts <- array(0,dim=c(maxC,numPairs)); 
-cat_means <- array(0,dim=c(maxC,numPairs));
-cat_vars <- array(0,dim=c(maxC,numPairs));
-
-
-for( i in 1:s1size ) { 
-    a1 <- S1[,i];
-    k1 <- K1[1,i];
-    A1 <- as.matrix(D[,a1]);
-
-    for( j in 1:s2size ) {
-        pairID <-(i-1)*s2size+j;
-        a2 <- S2[,j];
-        k2 <- K2[1,j];
-        A2 <- as.matrix(D[,a2]);
-    
-        if (k1 == k2) {
-            if (k1 == 1) {   
-                # scale-scale
-                print("scale-scale");
-                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
-                #basestats[1,pairID] <- cor(A1, A2);
-                
-                print(basestats[1,pairID]);
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("categorical-categorical");
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal   
-                    print("ordinal-ordinal");
-                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
-                }
-            }
-        } 
-        else {       
-            if (k1 == 1 || k2 == 1) {    
-                # Scale-nominal/ordinal
-                print("scale-categorical");
-                if ( k1 == 1 ) {
-                    Av <- as.matrix(A2); 
-                    Yv <- as.matrix(A1); 
-                }
-                else {
-                    Av <- as.matrix(A1); 
-                    Yv <- as.matrix(A2); 
-                }
-                
-                W <- nrow(Av);
-                my <- mean(Yv); 
-                varY <- var(Yv);
-                
-                CFreqs <- as.matrix(table(Av)); 
-                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
-                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
-                R <- nrow(CFreqs);
-              
-                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
-                ANOVAF <- anova_num/anova_den;
-
-                basestats[7,pairID] <- Eta;
-                basestats[8,pairID] <- ANOVAF;
-
-                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
-                cat_means[ 1:length(CMeans),pairID] <- CMeans;
-                cat_vars[ 1:length(CVars),pairID] <- CVars;
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal    
-                print("nomial-ordinal"); #TODO should not be same code            
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-                
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-            }
-        }
-    }
-}
-
-writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
-writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", sep=""));
-writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", sep=""));
-writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+
+D1 <- readMM(paste(args[1], "D.mtx", sep=""))
+S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
+S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
+K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
+K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
+D <- as.matrix(D1);
+S1 <- as.matrix(S11);
+S2 <- as.matrix(S21);
+K1 <- as.matrix(K11);
+K2 <- as.matrix(K21);
+
+numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
+maxC <- args[2]; # max number of categories in any categorical attribute
+
+s1size <- ncol(S1);
+s2size <- ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats <- 8;
+basestats <- array(0,dim=c(numstats,numPairs)); 
+cat_counts <- array(0,dim=c(maxC,numPairs)); 
+cat_means <- array(0,dim=c(maxC,numPairs));
+cat_vars <- array(0,dim=c(maxC,numPairs));
+
+
+for( i in 1:s1size ) { 
+    a1 <- S1[,i];
+    k1 <- K1[1,i];
+    A1 <- as.matrix(D[,a1]);
+
+    for( j in 1:s2size ) {
+        pairID <-(i-1)*s2size+j;
+        a2 <- S2[,j];
+        k2 <- K2[1,j];
+        A2 <- as.matrix(D[,a2]);
+    
+        if (k1 == k2) {
+            if (k1 == 1) {   
+                # scale-scale
+                print("scale-scale");
+                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
+                #basestats[1,pairID] <- cor(A1, A2);
+                
+                print(basestats[1,pairID]);
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("categorical-categorical");
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal   
+                    print("ordinal-ordinal");
+                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
+                }
+            }
+        } 
+        else {       
+            if (k1 == 1 || k2 == 1) {    
+                # Scale-nominal/ordinal
+                print("scale-categorical");
+                if ( k1 == 1 ) {
+                    Av <- as.matrix(A2); 
+                    Yv <- as.matrix(A1); 
+                }
+                else {
+                    Av <- as.matrix(A1); 
+                    Yv <- as.matrix(A2); 
+                }
+                
+                W <- nrow(Av);
+                my <- mean(Yv); 
+                varY <- var(Yv);
+                
+                CFreqs <- as.matrix(table(Av)); 
+                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
+                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
+                R <- nrow(CFreqs);
+              
+                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
+                ANOVAF <- anova_num/anova_den;
+
+                basestats[7,pairID] <- Eta;
+                basestats[8,pairID] <- ANOVAF;
+
+                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
+                cat_means[ 1:length(CMeans),pairID] <- CMeans;
+                cat_vars[ 1:length(CVars),pairID] <- CVars;
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal    
+                print("nomial-ordinal"); #TODO should not be same code            
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+                
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+            }
+        }
+    }
+}
+
+writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
+writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", sep=""));
+writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", sep=""));
+writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate0.dml b/src/test/scripts/applications/parfor/parfor_bivariate0.dml
index ac79abd..341a0b1 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate0.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate0.dml
@@ -1,265 +1,265 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-for( i in 1:s1size ) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-    #print("a1="+a1);
-
-    for( j in 1:s2size ) {
-        pairID = (i-1)*s2size+j; 
-        #print("ID="+pairID+"(i="+i+",j="+j+")");
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-        #print("a2="+a2);
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-                #print("scale:"+r);
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-    #print("Rmx="+m2X);
-    #print("Rmy="+m2Y);
-    #print("Rcov="+covXY);
-    #print("Rsx="+sigmaX);
-    #print("Rsy="+sigmaY);
-    #print("R="+R);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+for( i in 1:s1size ) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+    #print("a1="+a1);
+
+    for( j in 1:s2size ) {
+        pairID = (i-1)*s2size+j; 
+        #print("ID="+pairID+"(i="+i+",j="+j+")");
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+        #print("a2="+a2);
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+                #print("scale:"+r);
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+    #print("Rmx="+m2X);
+    #print("Rmy="+m2Y);
+    #print("Rcov="+covXY);
+    #print("Rsx="+sigmaX);
+    #print("Rsy="+sigmaY);
+    #print("R="+R);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate1.dml b/src/test/scripts/applications/parfor/parfor_bivariate1.dml
index 69a7ad6..aa40d7c 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate1.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate1.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+


[17/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr_large.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr_large.R b/src/test/scripts/applications/parfor/parfor_corr_large.R
index 22c8738..98ff436 100644
--- a/src/test/scripts/applications/parfor/parfor_corr_large.R
+++ b/src/test/scripts/applications/parfor/parfor_corr_large.R
@@ -1,48 +1,48 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-
-m <- nrow(V);
-n <- ncol(V); 
-W <- m;
-
-R <- array(0,dim=c(n,n))
-
-for( i in 1:8 )
-{
-   X <- V[ ,i];                 
-      
-   for( j in (i+1):(i+9) )  
-   {
-      Y <- V[ ,j];  
-      R[i,j] <- cor(X, Y)  
-      #print(R[i,j]);
-   }
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+
+m <- nrow(V);
+n <- ncol(V); 
+W <- m;
+
+R <- array(0,dim=c(n,n))
+
+for( i in 1:8 )
+{
+   X <- V[ ,i];                 
+      
+   for( j in (i+1):(i+9) )  
+   {
+      Y <- V[ ,j];  
+      R[i,j] <- cor(X, Y)  
+      #print(R[i,j]);
+   }
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr_large1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr_large1.dml b/src/test/scripts/applications/parfor/parfor_corr_large1.dml
index fc198d8..556034d 100644
--- a/src/test/scripts/applications/parfor/parfor_corr_large1.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr_large1.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:8, par=2, mode=LOCAL, opt=NONE )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):(i+9), par=2, mode=LOCAL, opt=NONE )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      #print("R[("+i+","+j+")]="+rXY); 
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:8, par=2, mode=LOCAL, opt=NONE )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):(i+9), par=2, mode=LOCAL, opt=NONE )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY); 
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr_large2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr_large2.dml b/src/test/scripts/applications/parfor/parfor_corr_large2.dml
index 668d5eb..623eb03 100644
--- a/src/test/scripts/applications/parfor/parfor_corr_large2.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr_large2.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0,rows=n,cols=n); 
-
-parfor( i in 1:8, par=2, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):(i+9), par=2, mode=REMOTE_MR, opt=NONE )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0,rows=n,cols=n); 
+
+parfor( i in 1:8, par=2, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):(i+9), par=2, mode=REMOTE_MR, opt=NONE )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4);     
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr_large3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr_large3.dml b/src/test/scripts/applications/parfor/parfor_corr_large3.dml
index 4b9dfbc..df8e47e 100644
--- a/src/test/scripts/applications/parfor/parfor_corr_large3.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr_large3.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:8, par=2, mode=REMOTE_MR, datapartitioner=LOCAL, opt=NONE )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):(i+9), par=2, mode=LOCAL, opt=NONE )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:8, par=2, mode=REMOTE_MR, datapartitioner=LOCAL, opt=NONE )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):(i+9), par=2, mode=LOCAL, opt=NONE )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr_large4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr_large4.dml b/src/test/scripts/applications/parfor/parfor_corr_large4.dml
index 2df6724..e827fab 100644
--- a/src/test/scripts/applications/parfor/parfor_corr_large4.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr_large4.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-
-parfor( i in 1:8 )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):(i+9) )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
-      R[i,j] = rXY; 
-      
-   }
-}   
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+
+parfor( i in 1:8 )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):(i+9) )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      print("R[("+i+","+j+")]="+rXY); #test robustness of ProgramConverter
+      R[i,j] = rXY; 
+      
+   }
+}   
+
 write(R, $4); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm.R b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm.R
index aa3b79d..6f935b7 100644
--- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm.R
+++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm.R
@@ -1,291 +1,291 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("matrixStats") 
-
-
-
-################################################################################
-
-printFoldStatistics = function(stats)
-{
-   mean_correct_pct = mean( stats[,1])
-
-   #print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
-}
-
-################################################################################
-
-scoreMultiClassSVM = function( X, y, W, intercept) 
-{
-   Nt = nrow(X);
-   num_classes = ncol(W)
-   b = matrix( 0, 1, num_classes)
-   n = ncol(X);
-
-   if (intercept == 1) 
-   {
-      b = W[n+1,]
-   }
-       
-   ones = matrix( 1, Nt, 1 )
-   scores = X %*% W[1:n,] + ones %*% b;                          
-                                 
-   #predicted_y = which(scores == rowMaxs(scores));
-   predicted_y = matrix(0,nrow(scores),1);
-   for( i in 1:nrow(scores) )
-   {      
-      predicted_y[i,1]<-which.max(scores[i,]); 
-   }
-   
-   correct_percentage = sum( ((predicted_y - y)==0.0)) / Nt * 100;
-   out_correct_pct = correct_percentage;
-
-   return (out_correct_pct);
-}
-
-
-################################################################################
-
-multiClassSVM = function (X, Y, intercept, num_classes, epsilon, lambda, max_iterations) 
-{
-   check_X <- sum(X)
-   if(check_X == 0){
-
-     print("X has no non-zeros")
-
-   } else {
-
-      num_samples <- nrow(X)
-      num_features <- ncol(X)
-      
-      if (intercept == 1) {
-        ones <- matrix( 1, num_samples, 1);
-        X <- cbind( X, ones);
-      }
-      
-      iter_class = 1
-      
-      Y_local <- 2 * ( Y == iter_class ) - 1
-      w_class <- matrix( 0, num_features, 1 )
-   
-      if (intercept == 1) {
-         zero_matrix <- matrix( 0, 1, 1 );
-         w_class <- t( cbind( t( w_class), zero_matrix));
-      }
-      
-      g_old <- t(X) %*% Y_local
-      s <- g_old
-      iter <- 0
-      continue <- 1
-   
-      while(continue == 1) {
-        # minimizing primal obj along direction s
-        step_sz <- 0
-        Xd <- X %*% s
-        wd <- lambda * sum(w_class * s)
-        dd <- lambda * sum(s * s)
-        continue1 <- 1
-        
-        while(continue1 == 1){
-         tmp_w <- w_class + step_sz*s
-         out <- 1 - Y_local * (X %*% tmp_w)
-         sv <- (out > 0)
-         out <- out * sv
-         g <- wd + step_sz*dd - sum(out * Y_local * Xd)
-         h <- dd + sum(Xd * sv * Xd)
-         step_sz <- step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-         
-        }
-       
-        #update weights
-        w_class <- w_class + step_sz*s
-       
-        out <- 1 - Y_local * (X %*% w_class)
-        sv <- (out > 0)
-        out <- sv * out
-        obj <- 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new <- t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp <- sum(s * g_old)
-        
-        train_acc <- sum( ((Y_local*(X%*%w_class)) >= 0))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be <- sum(g_new * g_new)/sum(g_old * g_old)
-        s <- be * s + g_new
-        g_old <- g_new
-      
-        iter <- iter + 1
-       }
-      
-      
-      w <- w_class
-      iter_class <- iter_class + 1
-      
-      while(iter_class <= num_classes){
-       Y_local <- 2 * (Y == iter_class) - 1
-       w_class <- matrix(0, ncol(X), 1)
-       if (intercept == 1) {
-       	zero_matrix <- matrix(0, 1, 1);
-       	w_class <- t(cbind(t(w_class), zero_matrix));
-       }
-       
-       g_old <- t(X) %*% Y_local
-       s <- g_old
-      
-       iter <- 0
-       continue <- 1
-       while(continue == 1)  {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd <- X %*% s
-        wd <- lambda * sum(w_class * s)
-        dd <- lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w <- w_class + step_sz*s
-         out <- 1 - Y_local * (X %*% tmp_w)
-         sv <- (out > 0)
-         out <- out * sv
-         g <- wd + step_sz*dd - sum(out * Y_local * Xd)
-         h <- dd + sum(Xd * sv * Xd)
-         step_sz <- step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 <- 0
-         }
-        }
-       
-        #update weights
-        w_class <- w_class + step_sz*s
-       
-        out <- 1 - Y_local * (X %*% w_class)
-        sv <- (out > 0)
-        out <- sv * out
-        obj <- 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new <- t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp <- sum(s * g_old)
-        
-        train_acc <- sum( ((Y_local*(X%*%w_class)) >= 0) )/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if( ((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)) ){
-         continue <- 0
-        }
-       
-        #non-linear CG step
-        be <- sum(g_new * g_new)/sum(g_old * g_old)
-        s <- be * s + g_new
-        g_old <- g_new
-      
-        iter <- iter + 1
-       }
-      
-       w <- cbind(w, w_class) 
-       iter_class <- iter_class + 1 
-      }
-      ret_W <- w
-   }
-   
-   return (ret_W);
-}
-
-
-
-
-X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-y <- as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
-
-m = nrow(X);
-n = ncol(X);
-
-k = as.numeric(args[2]);
-
-#parameters for model training
-intercept = as.numeric(args[3]);
-num_classes = as.numeric(args[4]);
-epsilon = as.numeric(args[5]);
-lambda = as.numeric(args[6]); 
-maxiter = as.numeric(args[7]);
-
-#CV
-P = as.matrix(readMM(paste(args[1], "P.mtx", sep="")))
-
-ones = matrix(1, 1, n);
-stats = matrix(0, k, 1); #k-folds x 1-stats
-   
-for( i in 1:k )
-{
-   #prepare train/test fold projections
-   vPxi <- (P == i);   #  Select 1/k fraction of the rows   
-   mPxi <- (vPxi %*% ones);       #  for the i-th fold TEST set
-   #nvPxi <- (P != i);
-   #nmPxi <- (nvPxi %*% ones);  #note: inefficient for sparse data  
-
-   #create train/test folds
-   Xi <- X * mPxi;  #  Create the TEST set with 1/k of all the rows
-   yi <- y * vPxi;  #  Create the labels for the TEST set
-   
-   nXi <- X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
-   nyi <- y - yi;   #  Create the labels for the TRAINING set
-   Xyi <- cbind(Xi,yi); #keep alignment on removeEmpty
-   #Xyi <- removeEmpty( target=Xyi, margin="rows" );
-   Xyi <- Xyi[rowSums((Xyi==0) | is.na(Xyi)) != ncol(Xyi),];
-   Xi <- Xyi[ , 1:n];
-   yi <- Xyi[ , n+1];   
-   
-   nXyi = cbind(nXi,nyi); #keep alignment on removeEmpty
-   #nXyi = removeEmpty( target=nXyi, margin="rows" );
-   nXyi = nXyi[rowSums((nXyi==0) | is.na(nXyi)) != ncol(nXyi),];
-   nXi = nXyi[ , 1:n];
-   nyi = nXyi[ , n+1];
-
-   #train multiclass SVM model per fold, use the TRAINING set
-   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
-
-   #score multiclass SVM model per fold, use the TEST set
-   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
-   
-   stats[i,1] <- out_correct_pct;
-}
-
-# print output of stats
-printFoldStatistics( stats );
-
-writeMM(as(stats, "CsparseMatrix"), paste(args[8], "stats", sep=""));
-
-
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("matrixStats") 
+
+
+
+################################################################################
+
+printFoldStatistics = function(stats)
+{
+   mean_correct_pct = mean( stats[,1])
+
+   #print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
+}
+
+################################################################################
+
+scoreMultiClassSVM = function( X, y, W, intercept) 
+{
+   Nt = nrow(X);
+   num_classes = ncol(W)
+   b = matrix( 0, 1, num_classes)
+   n = ncol(X);
+
+   if (intercept == 1) 
+   {
+      b = W[n+1,]
+   }
+       
+   ones = matrix( 1, Nt, 1 )
+   scores = X %*% W[1:n,] + ones %*% b;                          
+                                 
+   #predicted_y = which(scores == rowMaxs(scores));
+   predicted_y = matrix(0,nrow(scores),1);
+   for( i in 1:nrow(scores) )
+   {      
+      predicted_y[i,1]<-which.max(scores[i,]); 
+   }
+   
+   correct_percentage = sum( ((predicted_y - y)==0.0)) / Nt * 100;
+   out_correct_pct = correct_percentage;
+
+   return (out_correct_pct);
+}
+
+
+################################################################################
+
+multiClassSVM = function (X, Y, intercept, num_classes, epsilon, lambda, max_iterations) 
+{
+   check_X <- sum(X)
+   if(check_X == 0){
+
+     print("X has no non-zeros")
+
+   } else {
+
+      num_samples <- nrow(X)
+      num_features <- ncol(X)
+      
+      if (intercept == 1) {
+        ones <- matrix( 1, num_samples, 1);
+        X <- cbind( X, ones);
+      }
+      
+      iter_class = 1
+      
+      Y_local <- 2 * ( Y == iter_class ) - 1
+      w_class <- matrix( 0, num_features, 1 )
+   
+      if (intercept == 1) {
+         zero_matrix <- matrix( 0, 1, 1 );
+         w_class <- t( cbind( t( w_class), zero_matrix));
+      }
+      
+      g_old <- t(X) %*% Y_local
+      s <- g_old
+      iter <- 0
+      continue <- 1
+   
+      while(continue == 1) {
+        # minimizing primal obj along direction s
+        step_sz <- 0
+        Xd <- X %*% s
+        wd <- lambda * sum(w_class * s)
+        dd <- lambda * sum(s * s)
+        continue1 <- 1
+        
+        while(continue1 == 1){
+         tmp_w <- w_class + step_sz*s
+         out <- 1 - Y_local * (X %*% tmp_w)
+         sv <- (out > 0)
+         out <- out * sv
+         g <- wd + step_sz*dd - sum(out * Y_local * Xd)
+         h <- dd + sum(Xd * sv * Xd)
+         step_sz <- step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+         
+        }
+       
+        #update weights
+        w_class <- w_class + step_sz*s
+       
+        out <- 1 - Y_local * (X %*% w_class)
+        sv <- (out > 0)
+        out <- sv * out
+        obj <- 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new <- t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp <- sum(s * g_old)
+        
+        train_acc <- sum( ((Y_local*(X%*%w_class)) >= 0))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be <- sum(g_new * g_new)/sum(g_old * g_old)
+        s <- be * s + g_new
+        g_old <- g_new
+      
+        iter <- iter + 1
+       }
+      
+      
+      w <- w_class
+      iter_class <- iter_class + 1
+      
+      while(iter_class <= num_classes){
+       Y_local <- 2 * (Y == iter_class) - 1
+       w_class <- matrix(0, ncol(X), 1)
+       if (intercept == 1) {
+       	zero_matrix <- matrix(0, 1, 1);
+       	w_class <- t(cbind(t(w_class), zero_matrix));
+       }
+       
+       g_old <- t(X) %*% Y_local
+       s <- g_old
+      
+       iter <- 0
+       continue <- 1
+       while(continue == 1)  {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd <- X %*% s
+        wd <- lambda * sum(w_class * s)
+        dd <- lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w <- w_class + step_sz*s
+         out <- 1 - Y_local * (X %*% tmp_w)
+         sv <- (out > 0)
+         out <- out * sv
+         g <- wd + step_sz*dd - sum(out * Y_local * Xd)
+         h <- dd + sum(Xd * sv * Xd)
+         step_sz <- step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 <- 0
+         }
+        }
+       
+        #update weights
+        w_class <- w_class + step_sz*s
+       
+        out <- 1 - Y_local * (X %*% w_class)
+        sv <- (out > 0)
+        out <- sv * out
+        obj <- 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new <- t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp <- sum(s * g_old)
+        
+        train_acc <- sum( ((Y_local*(X%*%w_class)) >= 0) )/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if( ((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)) ){
+         continue <- 0
+        }
+       
+        #non-linear CG step
+        be <- sum(g_new * g_new)/sum(g_old * g_old)
+        s <- be * s + g_new
+        g_old <- g_new
+      
+        iter <- iter + 1
+       }
+      
+       w <- cbind(w, w_class) 
+       iter_class <- iter_class + 1 
+      }
+      ret_W <- w
+   }
+   
+   return (ret_W);
+}
+
+
+
+
+X <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+y <- as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
+
+m = nrow(X);
+n = ncol(X);
+
+k = as.numeric(args[2]);
+
+#parameters for model training
+intercept = as.numeric(args[3]);
+num_classes = as.numeric(args[4]);
+epsilon = as.numeric(args[5]);
+lambda = as.numeric(args[6]); 
+maxiter = as.numeric(args[7]);
+
+#CV
+P = as.matrix(readMM(paste(args[1], "P.mtx", sep="")))
+
+ones = matrix(1, 1, n);
+stats = matrix(0, k, 1); #k-folds x 1-stats
+   
+for( i in 1:k )
+{
+   #prepare train/test fold projections
+   vPxi <- (P == i);   #  Select 1/k fraction of the rows   
+   mPxi <- (vPxi %*% ones);       #  for the i-th fold TEST set
+   #nvPxi <- (P != i);
+   #nmPxi <- (nvPxi %*% ones);  #note: inefficient for sparse data  
+
+   #create train/test folds
+   Xi <- X * mPxi;  #  Create the TEST set with 1/k of all the rows
+   yi <- y * vPxi;  #  Create the labels for the TEST set
+   
+   nXi <- X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
+   nyi <- y - yi;   #  Create the labels for the TRAINING set
+   Xyi <- cbind(Xi,yi); #keep alignment on removeEmpty
+   #Xyi <- removeEmpty( target=Xyi, margin="rows" );
+   Xyi <- Xyi[rowSums((Xyi==0) | is.na(Xyi)) != ncol(Xyi),];
+   Xi <- Xyi[ , 1:n];
+   yi <- Xyi[ , n+1];   
+   
+   nXyi = cbind(nXi,nyi); #keep alignment on removeEmpty
+   #nXyi = removeEmpty( target=nXyi, margin="rows" );
+   nXyi = nXyi[rowSums((nXyi==0) | is.na(nXyi)) != ncol(nXyi),];
+   nXi = nXyi[ , 1:n];
+   nyi = nXyi[ , n+1];
+
+   #train multiclass SVM model per fold, use the TRAINING set
+   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
+
+   #score multiclass SVM model per fold, use the TEST set
+   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
+   
+   stats[i,1] <- out_correct_pct;
+}
+
+# print output of stats
+printFoldStatistics( stats );
+
+writeMM(as(stats, "CsparseMatrix"), paste(args[8], "stats", sep=""));
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml
index 02d2958..173826a 100644
--- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml
+++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml
@@ -1,274 +1,274 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
-
-
-X = read( $1, rows=$3, cols=$4 );
-y = read( $2, rows=$3, cols=1 );
-m = nrow( X );
-n = ncol( X );
-
-k = $5;
-
-#parameters for model training
-intercept = $6
-num_classes = $7
-epsilon = $8
-lambda = $9 
-maxiter = $10
-
-#CV
-#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
-#P = round(0.5+P*k);
-P = read($12, rows=$3, cols=1);
-
-ones = matrix(1, rows=1, cols=n);
-stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
-   
-for( i in 1:k )
-{
-   #prepare train/test fold projections
-   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
-   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
-   #nvPxi = ppred( P, i, "!=" );
-   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
-
-   #create train/test folds
-   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
-   yi = y * vPxi;  #  Create the labels for the TEST set
-   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
-   nyi = y - yi;   #  Create the labels for the TRAINING set
-   Xyi = append(Xi,yi); #keep alignment on removeEmpty
-   Xyi = removeEmpty( target=Xyi, margin="rows" );
-   Xi = Xyi[ , 1:n];
-   yi = Xyi[ , n+1];   
-   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
-   nXyi = removeEmpty( target=nXyi, margin="rows" );
-   nXi = nXyi[ , 1:n];
-   nyi = nXyi[ , n+1];
-
-   #train multiclass SVM model per fold, use the TRAINING set
-   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
-
-   #score multiclass SVM model per fold, use the TEST set
-   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
-   
-   stats[i,1] = out_correct_pct;
-}
-
-# print output of stats
-z = printFoldStatistics( stats );
-
-write( stats, $11 );
-
-################################################################################
-
-printFoldStatistics = function( Matrix[double] stats)
-   return( Integer err)
-{
-   mean_correct_pct = mean( stats[,1])
-
-   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
-
-   err = 0
-}
-
-################################################################################
-
-scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
-   return (Double out_correct_pct)
-{
-   Nt = nrow(X);
-   num_classes = ncol(W)
-   b = matrix( 0, rows=1, cols=num_classes )
-   n = ncol(X);
-
-   if (intercept == 1) 
-   {
-      b = W[n+1,]
-   }
-   
-   ones = matrix( 1, rows=Nt, cols=1 )
-
-   scores = X %*% W[1:n,] + ones %*% b;
-   
-   predicted_y = rowIndexMax( scores);
-   
-   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
-
-   out_correct_pct = correct_percentage;
-
-}
-
-
-################################################################################
-
-multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
-   return (Matrix[double] ret_W) 
-{
-   check_X = sum(X)
-   if(check_X == 0){
-
-     print("X has no non-zeros")
-
-   } else {
-
-      num_samples = nrow(X)
-      num_features = ncol(X)
-      
-      if (intercept == 1) {
-        ones = matrix( 1, rows=num_samples, cols=1 );
-        X = append( X, ones);
-      }
-      
-      iter_class = 1
-      
-      Y_local = 2 * ppred( Y, iter_class, "==") - 1
-      w_class = matrix( 0, rows=num_features, cols=1 )
-   
-      if (intercept == 1) {
-         zero_matrix = matrix( 0, rows=1, cols=1 );
-         w_class = t( append( t( w_class), zero_matrix));
-      }
-      
-      g_old = t(X) %*% Y_local
-      s = g_old
-      iter = 0
-      continue = 1
-   
-      while(continue == 1) {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-      
-      w = w_class
-      iter_class = iter_class + 1
-      
-      while(iter_class <= num_classes){
-       Y_local = 2 * ppred(Y, iter_class, "==") - 1
-       w_class = matrix(0, rows=ncol(X), cols=1)
-       if (intercept == 1) {
-       	zero_matrix = matrix(0, rows=1, cols=1);
-       	w_class = t(append(t(w_class), zero_matrix));
-       }
-       
-       g_old = t(X) %*% Y_local
-       s = g_old
-      
-       iter = 0
-       continue = 1
-       while(continue == 1)  {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-       w = append(w, w_class) 
-       iter_class = iter_class + 1
-      }
-      ret_W = w
-   }
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
+
+
+X = read( $1, rows=$3, cols=$4 );
+y = read( $2, rows=$3, cols=1 );
+m = nrow( X );
+n = ncol( X );
+
+k = $5;
+
+#parameters for model training
+intercept = $6
+num_classes = $7
+epsilon = $8
+lambda = $9 
+maxiter = $10
+
+#CV
+#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
+#P = round(0.5+P*k);
+P = read($12, rows=$3, cols=1);
+
+ones = matrix(1, rows=1, cols=n);
+stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
+   
+for( i in 1:k )
+{
+   #prepare train/test fold projections
+   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
+   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
+   #nvPxi = ppred( P, i, "!=" );
+   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
+
+   #create train/test folds
+   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
+   yi = y * vPxi;  #  Create the labels for the TEST set
+   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
+   nyi = y - yi;   #  Create the labels for the TRAINING set
+   Xyi = append(Xi,yi); #keep alignment on removeEmpty
+   Xyi = removeEmpty( target=Xyi, margin="rows" );
+   Xi = Xyi[ , 1:n];
+   yi = Xyi[ , n+1];   
+   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
+   nXyi = removeEmpty( target=nXyi, margin="rows" );
+   nXi = nXyi[ , 1:n];
+   nyi = nXyi[ , n+1];
+
+   #train multiclass SVM model per fold, use the TRAINING set
+   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
+
+   #score multiclass SVM model per fold, use the TEST set
+   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
+   
+   stats[i,1] = out_correct_pct;
+}
+
+# print output of stats
+z = printFoldStatistics( stats );
+
+write( stats, $11 );
+
+################################################################################
+
+printFoldStatistics = function( Matrix[double] stats)
+   return( Integer err)
+{
+   mean_correct_pct = mean( stats[,1])
+
+   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
+
+   err = 0
+}
+
+################################################################################
+
+scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
+   return (Double out_correct_pct)
+{
+   Nt = nrow(X);
+   num_classes = ncol(W)
+   b = matrix( 0, rows=1, cols=num_classes )
+   n = ncol(X);
+
+   if (intercept == 1) 
+   {
+      b = W[n+1,]
+   }
+   
+   ones = matrix( 1, rows=Nt, cols=1 )
+
+   scores = X %*% W[1:n,] + ones %*% b;
+   
+   predicted_y = rowIndexMax( scores);
+   
+   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
+
+   out_correct_pct = correct_percentage;
+
+}
+
+
+################################################################################
+
+multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
+   return (Matrix[double] ret_W) 
+{
+   check_X = sum(X)
+   if(check_X == 0){
+
+     print("X has no non-zeros")
+
+   } else {
+
+      num_samples = nrow(X)
+      num_features = ncol(X)
+      
+      if (intercept == 1) {
+        ones = matrix( 1, rows=num_samples, cols=1 );
+        X = append( X, ones);
+      }
+      
+      iter_class = 1
+      
+      Y_local = 2 * ppred( Y, iter_class, "==") - 1
+      w_class = matrix( 0, rows=num_features, cols=1 )
+   
+      if (intercept == 1) {
+         zero_matrix = matrix( 0, rows=1, cols=1 );
+         w_class = t( append( t( w_class), zero_matrix));
+      }
+      
+      g_old = t(X) %*% Y_local
+      s = g_old
+      iter = 0
+      continue = 1
+   
+      while(continue == 1) {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+      
+      w = w_class
+      iter_class = iter_class + 1
+      
+      while(iter_class <= num_classes){
+       Y_local = 2 * ppred(Y, iter_class, "==") - 1
+       w_class = matrix(0, rows=ncol(X), cols=1)
+       if (intercept == 1) {
+       	zero_matrix = matrix(0, rows=1, cols=1);
+       	w_class = t(append(t(w_class), zero_matrix));
+       }
+       
+       g_old = t(X) %*% Y_local
+       s = g_old
+      
+       iter = 0
+       continue = 1
+       while(continue == 1)  {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+       w = append(w, w_class) 
+       iter_class = iter_class + 1
+      }
+      ret_W = w
+   }
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml
index 06b1318..d8b2218 100644
--- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml
+++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml
@@ -1,274 +1,274 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
-
-
-X = read( $1, rows=$3, cols=$4 );
-y = read( $2, rows=$3, cols=1 );
-m = nrow( X );
-n = ncol( X );
-
-k = $5;
-
-#parameters for model training
-intercept = $6
-num_classes = $7
-epsilon = $8
-lambda = $9 
-maxiter = $10
-
-#CV
-#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
-#P = round(0.5+P*k);
-P = read($12, rows=$3, cols=1);
-
-ones = matrix(1, rows=1, cols=n);
-stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
-   
-parfor( i in 1:k, par=4, mode=LOCAL, opt=NONE )
-{
-   #prepare train/test fold projections
-   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
-   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
-   #nvPxi = ppred( P, i, "!=" );
-   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
-
-   #create train/test folds
-   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
-   yi = y * vPxi;  #  Create the labels for the TEST set
-   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
-   nyi = y - yi;   #  Create the labels for the TRAINING set
-   Xyi = append(Xi,yi); #keep alignment on removeEmpty
-   Xyi = removeEmpty( target=Xyi, margin="rows" );
-   Xi = Xyi[ , 1:n];
-   yi = Xyi[ , n+1];   
-   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
-   nXyi = removeEmpty( target=nXyi, margin="rows" );
-   nXi = nXyi[ , 1:n];
-   nyi = nXyi[ , n+1];
-
-   #train multiclass SVM model per fold, use the TRAINING set
-   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
-
-   #score multiclass SVM model per fold, use the TEST set
-   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
-   
-   stats[i,1] = out_correct_pct;
-}
-
-# print output of stats
-z = printFoldStatistics( stats );
-
-write( stats, $11 );
-
-################################################################################
-
-printFoldStatistics = function( Matrix[double] stats)
-   return( Integer err)
-{
-   mean_correct_pct = mean( stats[,1])
-
-   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
-
-   err = 0
-}
-
-################################################################################
-
-scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
-   return (Double out_correct_pct)
-{
-   Nt = nrow(X);
-   num_classes = ncol(W)
-   b = matrix( 0, rows=1, cols=num_classes )
-   n = ncol(X);
-
-   if (intercept == 1) 
-   {
-      b = W[n+1,]
-   }
-   
-   ones = matrix( 1, rows=Nt, cols=1 )
-
-   scores = X %*% W[1:n,] + ones %*% b;
-   
-   predicted_y = rowIndexMax( scores);
-   
-   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
-
-   out_correct_pct = correct_percentage;
-
-}
-
-
-################################################################################
-
-multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
-   return (Matrix[double] ret_W) 
-{
-   check_X = sum(X)
-   if(check_X == 0){
-
-     print("X has no non-zeros")
-
-   } else {
-
-      num_samples = nrow(X)
-      num_features = ncol(X)
-      
-      if (intercept == 1) {
-        ones = matrix( 1, rows=num_samples, cols=1 );
-        X = append( X, ones);
-      }
-      
-      iter_class = 1
-      
-      Y_local = 2 * ppred( Y, iter_class, "==") - 1
-      w_class = matrix( 0, rows=num_features, cols=1 )
-   
-      if (intercept == 1) {
-         zero_matrix = matrix( 0, rows=1, cols=1 );
-         w_class = t( append( t( w_class), zero_matrix));
-      }
-      
-      g_old = t(X) %*% Y_local
-      s = g_old
-      iter = 0
-      continue = 1
-   
-      while(continue == 1) {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-      
-      w = w_class
-      iter_class = iter_class + 1
-      
-      while(iter_class <= num_classes){
-       Y_local = 2 * ppred(Y, iter_class, "==") - 1
-       w_class = matrix(0, rows=ncol(X), cols=1)
-       if (intercept == 1) {
-       	zero_matrix = matrix(0, rows=1, cols=1);
-       	w_class = t(append(t(w_class), zero_matrix));
-       }
-       
-       g_old = t(X) %*% Y_local
-       s = g_old
-      
-       iter = 0
-       continue = 1
-       while(continue == 1)  {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-       w = append(w, w_class) 
-       iter_class = iter_class + 1
-      }
-      ret_W = w
-   }
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
+
+
+X = read( $1, rows=$3, cols=$4 );
+y = read( $2, rows=$3, cols=1 );
+m = nrow( X );
+n = ncol( X );
+
+k = $5;
+
+#parameters for model training
+intercept = $6
+num_classes = $7
+epsilon = $8
+lambda = $9 
+maxiter = $10
+
+#CV
+#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
+#P = round(0.5+P*k);
+P = read($12, rows=$3, cols=1);
+
+ones = matrix(1, rows=1, cols=n);
+stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
+   
+parfor( i in 1:k, par=4, mode=LOCAL, opt=NONE )
+{
+   #prepare train/test fold projections
+   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
+   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
+   #nvPxi = ppred( P, i, "!=" );
+   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
+
+   #create train/test folds
+   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
+   yi = y * vPxi;  #  Create the labels for the TEST set
+   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
+   nyi = y - yi;   #  Create the labels for the TRAINING set
+   Xyi = append(Xi,yi); #keep alignment on removeEmpty
+   Xyi = removeEmpty( target=Xyi, margin="rows" );
+   Xi = Xyi[ , 1:n];
+   yi = Xyi[ , n+1];   
+   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
+   nXyi = removeEmpty( target=nXyi, margin="rows" );
+   nXi = nXyi[ , 1:n];
+   nyi = nXyi[ , n+1];
+
+   #train multiclass SVM model per fold, use the TRAINING set
+   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
+
+   #score multiclass SVM model per fold, use the TEST set
+   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
+   
+   stats[i,1] = out_correct_pct;
+}
+
+# print output of stats
+z = printFoldStatistics( stats );
+
+write( stats, $11 );
+
+################################################################################
+
+printFoldStatistics = function( Matrix[double] stats)
+   return( Integer err)
+{
+   mean_correct_pct = mean( stats[,1])
+
+   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
+
+   err = 0
+}
+
+################################################################################
+
+scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
+   return (Double out_correct_pct)
+{
+   Nt = nrow(X);
+   num_classes = ncol(W)
+   b = matrix( 0, rows=1, cols=num_classes )
+   n = ncol(X);
+
+   if (intercept == 1) 
+   {
+      b = W[n+1,]
+   }
+   
+   ones = matrix( 1, rows=Nt, cols=1 )
+
+   scores = X %*% W[1:n,] + ones %*% b;
+   
+   predicted_y = rowIndexMax( scores);
+   
+   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
+
+   out_correct_pct = correct_percentage;
+
+}
+
+
+################################################################################
+
+multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
+   return (Matrix[double] ret_W) 
+{
+   check_X = sum(X)
+   if(check_X == 0){
+
+     print("X has no non-zeros")
+
+   } else {
+
+      num_samples = nrow(X)
+      num_features = ncol(X)
+      
+      if (intercept == 1) {
+        ones = matrix( 1, rows=num_samples, cols=1 );
+        X = append( X, ones);
+      }
+      
+      iter_class = 1
+      
+      Y_local = 2 * ppred( Y, iter_class, "==") - 1
+      w_class = matrix( 0, rows=num_features, cols=1 )
+   
+      if (intercept == 1) {
+         zero_matrix = matrix( 0, rows=1, cols=1 );
+         w_class = t( append( t( w_class), zero_matrix));
+      }
+      
+      g_old = t(X) %*% Y_local
+      s = g_old
+      iter = 0
+      continue = 1
+   
+      while(continue == 1) {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+      
+      w = w_class
+      iter_class = iter_class + 1
+      
+      while(iter_class <= num_classes){
+       Y_local = 2 * ppred(Y, iter_class, "==") - 1
+       w_class = matrix(0, rows=ncol(X), cols=1)
+       if (intercept == 1) {
+       	zero_matrix = matrix(0, rows=1, cols=1);
+       	w_class = t(append(t(w_class), zero_matrix));
+       }
+       
+       g_old = t(X) %*% Y_local
+       s = g_old
+      
+       iter = 0
+       continue = 1
+       while(continue == 1)  {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+       w = append(w, w_class) 
+       iter_class = iter_class + 1
+      }
+      ret_W = w
+   }
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml
index a2a162c..8e6e4f1 100644
--- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml
+++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml
@@ -1,274 +1,274 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
-
-
-X = read( $1, rows=$3, cols=$4 );
-y = read( $2, rows=$3, cols=1 );
-m = nrow( X );
-n = ncol( X );
-
-k = $5;
-
-#parameters for model training
-intercept = $6
-num_classes = $7
-epsilon = $8
-lambda = $9 
-maxiter = $10
-
-#CV
-#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
-#P = round(0.5+P*k);
-P = read($12, rows=$3, cols=1);
-
-ones = matrix(1, rows=1, cols=n);
-stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
-   
-parfor( i in 1:k )
-{
-   #prepare train/test fold projections
-   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
-   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
-   #nvPxi = ppred( P, i, "!=" );
-   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
-
-   #create train/test folds
-   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
-   yi = y * vPxi;  #  Create the labels for the TEST set
-   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
-   nyi = y - yi;   #  Create the labels for the TRAINING set
-   Xyi = append(Xi,yi); #keep alignment on removeEmpty
-   Xyi = removeEmpty( target=Xyi, margin="rows" );
-   Xi = Xyi[ , 1:n];
-   yi = Xyi[ , n+1];   
-   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
-   nXyi = removeEmpty( target=nXyi, margin="rows" );
-   nXi = nXyi[ , 1:n];
-   nyi = nXyi[ , n+1];
-
-   #train multiclass SVM model per fold, use the TRAINING set
-   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
-
-   #score multiclass SVM model per fold, use the TEST set
-   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
-   
-   stats[i,1] = out_correct_pct;
-}
-
-# print output of stats
-z = printFoldStatistics( stats );
-
-write( stats, $11 );
-
-################################################################################
-
-printFoldStatistics = function( Matrix[double] stats)
-   return( Integer err)
-{
-   mean_correct_pct = mean( stats[,1])
-
-   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
-
-   err = 0
-}
-
-################################################################################
-
-scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
-   return (Double out_correct_pct)
-{
-   Nt = nrow(X);
-   num_classes = ncol(W)
-   b = matrix( 0, rows=1, cols=num_classes )
-   n = ncol(X);
-
-   if (intercept == 1) 
-   {
-      b = W[n+1,]
-   }
-   
-   ones = matrix( 1, rows=Nt, cols=1 )
-
-   scores = X %*% W[1:n,] + ones %*% b;
-   
-   predicted_y = rowIndexMax( scores);
-   
-   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
-
-   out_correct_pct = correct_percentage;
-
-}
-
-
-################################################################################
-
-multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
-   return (Matrix[double] ret_W) 
-{
-   check_X = sum(X)
-   if(check_X == 0){
-
-     print("X has no non-zeros")
-
-   } else {
-
-      num_samples = nrow(X)
-      num_features = ncol(X)
-      
-      if (intercept == 1) {
-        ones = matrix( 1, rows=num_samples, cols=1 );
-        X = append( X, ones);
-      }
-      
-      iter_class = 1
-      
-      Y_local = 2 * ppred( Y, iter_class, "==") - 1
-      w_class = matrix( 0, rows=num_features, cols=1 )
-   
-      if (intercept == 1) {
-         zero_matrix = matrix( 0, rows=1, cols=1 );
-         w_class = t( append( t( w_class), zero_matrix));
-      }
-      
-      g_old = t(X) %*% Y_local
-      s = g_old
-      iter = 0
-      continue = 1
-   
-      while(continue == 1) {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-      
-      w = w_class
-      iter_class = iter_class + 1
-      
-      while(iter_class <= num_classes){
-       Y_local = 2 * ppred(Y, iter_class, "==") - 1
-       w_class = matrix(0, rows=ncol(X), cols=1)
-       if (intercept == 1) {
-       	zero_matrix = matrix(0, rows=1, cols=1);
-       	w_class = t(append(t(w_class), zero_matrix));
-       }
-       
-       g_old = t(X) %*% Y_local
-       s = g_old
-      
-       iter = 0
-       continue = 1
-       while(continue == 1)  {
-        # minimizing primal obj along direction s
-        step_sz = 0
-        Xd = X %*% s
-        wd = lambda * sum(w_class * s)
-        dd = lambda * sum(s * s)
-        continue1 = 1
-        while(continue1 == 1){
-         tmp_w = w_class + step_sz*s
-         out = 1 - Y_local * (X %*% tmp_w)
-         sv = ppred(out, 0, ">")
-         out = out * sv
-         g = wd + step_sz*dd - sum(out * Y_local * Xd)
-         h = dd + sum(Xd * sv * Xd)
-         step_sz = step_sz - g/h
-         if (g*g/h < 0.0000000001){
-          continue1 = 0
-         }
-        }
-       
-        #update weights
-        w_class = w_class + step_sz*s
-       
-        out = 1 - Y_local * (X %*% w_class)
-        sv = ppred(out, 0, ">")
-        out = sv * out
-        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-        g_new = t(X) %*% (out * Y_local) - lambda * w_class
-      
-        tmp = sum(s * g_old)
-        
-        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-         
-        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-         continue = 0
-        }
-       
-        #non-linear CG step
-        be = sum(g_new * g_new)/sum(g_old * g_old)
-        s = be * s + g_new
-        g_old = g_new
-      
-        iter = iter + 1
-       }
-      
-       w = append(w, w_class) 
-       iter_class = iter_class + 1
-      }
-      ret_W = w
-   }
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# hadoop jar SystemML.jar -f CV_MultiClassSVM.dml -args demo/svm/X_50k_10 demo/svm/y_50k 4 0 2 0.001 1.0 100 
+
+
+X = read( $1, rows=$3, cols=$4 );
+y = read( $2, rows=$3, cols=1 );
+m = nrow( X );
+n = ncol( X );
+
+k = $5;
+
+#parameters for model training
+intercept = $6
+num_classes = $7
+epsilon = $8
+lambda = $9 
+maxiter = $10
+
+#CV
+#P = Rand(rows=m, cols=1, min=0.0, max=1.0, pdf = "uniform");
+#P = round(0.5+P*k);
+P = read($12, rows=$3, cols=1);
+
+ones = matrix(1, rows=1, cols=n);
+stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats
+   
+parfor( i in 1:k )
+{
+   #prepare train/test fold projections
+   vPxi = ppred( P, i, "==" );   #  Select 1/k fraction of the rows
+   mPxi = (vPxi %*% ones);       #  for the i-th fold TEST set
+   #nvPxi = ppred( P, i, "!=" );
+   #nmPxi = (nvPxi %*% ones);  #note: inefficient for sparse data  
+
+   #create train/test folds
+   Xi = X * mPxi;  #  Create the TEST set with 1/k of all the rows
+   yi = y * vPxi;  #  Create the labels for the TEST set
+   nXi = X - Xi;   #  Create the TRAINING set with (k-1)/k of the rows
+   nyi = y - yi;   #  Create the labels for the TRAINING set
+   Xyi = append(Xi,yi); #keep alignment on removeEmpty
+   Xyi = removeEmpty( target=Xyi, margin="rows" );
+   Xi = Xyi[ , 1:n];
+   yi = Xyi[ , n+1];   
+   nXyi = append(nXi,nyi); #keep alignment on removeEmpty
+   nXyi = removeEmpty( target=nXyi, margin="rows" );
+   nXi = nXyi[ , 1:n];
+   nyi = nXyi[ , n+1];
+
+   #train multiclass SVM model per fold, use the TRAINING set
+   wi = multiClassSVM( nXi, nyi, intercept, num_classes, epsilon, lambda, maxiter)
+
+   #score multiclass SVM model per fold, use the TEST set
+   out_correct_pct = scoreMultiClassSVM( Xi, yi, wi, intercept);
+   
+   stats[i,1] = out_correct_pct;
+}
+
+# print output of stats
+z = printFoldStatistics( stats );
+
+write( stats, $11 );
+
+################################################################################
+
+printFoldStatistics = function( Matrix[double] stats)
+   return( Integer err)
+{
+   mean_correct_pct = mean( stats[,1])
+
+   print (" Mean Correct Percentage of the " + nrow( stats) + " Folds: " + mean_correct_pct);
+
+   err = 0
+}
+
+################################################################################
+
+scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double] W, Integer intercept) 
+   return (Double out_correct_pct)
+{
+   Nt = nrow(X);
+   num_classes = ncol(W)
+   b = matrix( 0, rows=1, cols=num_classes )
+   n = ncol(X);
+
+   if (intercept == 1) 
+   {
+      b = W[n+1,]
+   }
+   
+   ones = matrix( 1, rows=Nt, cols=1 )
+
+   scores = X %*% W[1:n,] + ones %*% b;
+   
+   predicted_y = rowIndexMax( scores);
+   
+   correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100;
+
+   out_correct_pct = correct_percentage;
+
+}
+
+
+################################################################################
+
+multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, Integer num_classes, Double epsilon, Double lambda, Integer max_iterations) 
+   return (Matrix[double] ret_W) 
+{
+   check_X = sum(X)
+   if(check_X == 0){
+
+     print("X has no non-zeros")
+
+   } else {
+
+      num_samples = nrow(X)
+      num_features = ncol(X)
+      
+      if (intercept == 1) {
+        ones = matrix( 1, rows=num_samples, cols=1 );
+        X = append( X, ones);
+      }
+      
+      iter_class = 1
+      
+      Y_local = 2 * ppred( Y, iter_class, "==") - 1
+      w_class = matrix( 0, rows=num_features, cols=1 )
+   
+      if (intercept == 1) {
+         zero_matrix = matrix( 0, rows=1, cols=1 );
+         w_class = t( append( t( w_class), zero_matrix));
+      }
+      
+      g_old = t(X) %*% Y_local
+      s = g_old
+      iter = 0
+      continue = 1
+   
+      while(continue == 1) {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+      
+      w = w_class
+      iter_class = iter_class + 1
+      
+      while(iter_class <= num_classes){
+       Y_local = 2 * ppred(Y, iter_class, "==") - 1
+       w_class = matrix(0, rows=ncol(X), cols=1)
+       if (intercept == 1) {
+       	zero_matrix = matrix(0, rows=1, cols=1);
+       	w_class = t(append(t(w_class), zero_matrix));
+       }
+       
+       g_old = t(X) %*% Y_local
+       s = g_old
+      
+       iter = 0
+       continue = 1
+       while(continue == 1)  {
+        # minimizing primal obj along direction s
+        step_sz = 0
+        Xd = X %*% s
+        wd = lambda * sum(w_class * s)
+        dd = lambda * sum(s * s)
+        continue1 = 1
+        while(continue1 == 1){
+         tmp_w = w_class + step_sz*s
+         out = 1 - Y_local * (X %*% tmp_w)
+         sv = ppred(out, 0, ">")
+         out = out * sv
+         g = wd + step_sz*dd - sum(out * Y_local * Xd)
+         h = dd + sum(Xd * sv * Xd)
+         step_sz = step_sz - g/h
+         if (g*g/h < 0.0000000001){
+          continue1 = 0
+         }
+        }
+       
+        #update weights
+        w_class = w_class + step_sz*s
+       
+        out = 1 - Y_local * (X %*% w_class)
+        sv = ppred(out, 0, ">")
+        out = sv * out
+        obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+        g_new = t(X) %*% (out * Y_local) - lambda * w_class
+      
+        tmp = sum(s * g_old)
+        
+        train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+        #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+         
+        if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+         continue = 0
+        }
+       
+        #non-linear CG step
+        be = sum(g_new * g_new)/sum(g_old * g_old)
+        s = be * s + g_new
+        g_old = g_new
+      
+        iter = iter + 1
+       }
+      
+       w = append(w, w_class) 
+       iter_class = iter_class + 1
+      }
+      ret_W = w
+   }
+}
+
+


[11/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_1.data.single
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_1.data.single b/src/test/scripts/functions/io/csv/in/transfusion_1.data.single
index 1ec0901..1a35bfe 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_1.data.single
+++ b/src/test/scripts/functions/io/csv/in/transfusion_1.data.single
@@ -1,749 +1,749 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_2.data.single
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_2.data.single b/src/test/scripts/functions/io/csv/in/transfusion_2.data.single
index 1ec0901..1a35bfe 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_2.data.single
+++ b/src/test/scripts/functions/io/csv/in/transfusion_2.data.single
@@ -1,749 +1,749 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-0
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-0 b/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-0
index 0d37198..4eb1f4b 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-0
+++ b/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-0
@@ -1,592 +1,592 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
 2 ,1,250,2 ,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-1
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-1 b/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-1
index fb20283..fd61f79 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-1
+++ b/src/test/scripts/functions/io/csv/in/transfusion_2.data/part-1
@@ -1,157 +1,157 @@
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file


[06/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals3.dml b/src/test/scripts/functions/parfor/parfor_literals3.dml
index feabb20..98a9328 100644
--- a/src/test/scripts/functions/parfor/parfor_literals3.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals3.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, rows=$2, cols=$3, format="text");   
-
-parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
-{
-   print("{"); #level in
-   print("}"); #level out
-   print(","); #instruction
-   print(";"); #component
-}  
-      
+
+A = read($1, rows=$2, cols=$3, format="text");   
+
+parfor( i in 1:1, mode=REMOTE_MR, opt=NONE ) 
+{
+   print("{"); #level in
+   print("}"); #level out
+   print(","); #instruction
+   print(";"); #component
+}  
+      
 write(A, $4);      
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals4a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals4a.dml b/src/test/scripts/functions/parfor/parfor_literals4a.dml
index ad00243..e6da682 100644
--- a/src/test/scripts/functions/parfor/parfor_literals4a.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals4a.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1, rows=$2, cols=$3, format="text");   
-a_t0 = matrix(0, rows=nrow(A),cols=ncol(A));
-
-parfor( i in 1:1, mode=LOCAL, opt=NONE, check=0 ) 
-{
-   a_t0[1:nrow(A),1:ncol(A)]=A+0; 
-}  
-
-write(a_t0, $4);
+
+
+A = read($1, rows=$2, cols=$3, format="text");   
+a_t0 = matrix(0, rows=nrow(A),cols=ncol(A));
+
+parfor( i in 1:1, mode=LOCAL, opt=NONE, check=0 ) 
+{
+   a_t0[1:nrow(A),1:ncol(A)]=A+0; 
+}  
+
+write(a_t0, $4);
       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_literals4b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_literals4b.dml b/src/test/scripts/functions/parfor/parfor_literals4b.dml
index cf72593..8fc7402 100644
--- a/src/test/scripts/functions/parfor/parfor_literals4b.dml
+++ b/src/test/scripts/functions/parfor/parfor_literals4b.dml
@@ -19,15 +19,15 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1, rows=$2, cols=$3, format="text");   
-a_t0 = matrix(0, rows=nrow(A),cols=ncol(A));
-
-parfor( i in 1:1, mode=REMOTE_MR, opt=NONE, check=0 ) 
-{
-   a_t0[1:nrow(A),1:ncol(A)]=A+0; 
-}  
-
-write(a_t0, $4);
+
+
+A = read($1, rows=$2, cols=$3, format="text");   
+a_t0 = matrix(0, rows=nrow(A),cols=ncol(A));
+
+parfor( i in 1:1, mode=REMOTE_MR, opt=NONE, check=0 ) 
+{
+   a_t0[1:nrow(A),1:ncol(A)]=A+0; 
+}  
+
+write(a_t0, $4);
       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_mdatapartitioning.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_mdatapartitioning.R b/src/test/scripts/functions/parfor/parfor_mdatapartitioning.R
index ccb97fb..a719f72 100644
--- a/src/test/scripts/functions/parfor/parfor_mdatapartitioning.R
+++ b/src/test/scripts/functions/parfor/parfor_mdatapartitioning.R
@@ -19,40 +19,40 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-
-R1 <- array(0,dim=c(1,n))
-R2 <- array(0,dim=c(1,n))
-
-for( i in 1:n )
-{
-   X <- V[ ,i];                 
-   R1[1,i] <- sum(X);
-}   
-
-if( args[3]==1 )
-{  
-  for( i in 1:n )
-  {
-     X1 <- V[i,]; 
-     X2 <- V[i,];                 
-     R2[1,i] <- R1[1,i] + sum(X1)+sum(X2);
-  }   
-} else {
-  for( i in 1:n )
-  {
-     X1 <- V[i,]; 
-     X2 <- V[,i];                 
-     R2[1,i] <- R1[1,i] + sum(X1)+sum(X2);
-  }  
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+
+R1 <- array(0,dim=c(1,n))
+R2 <- array(0,dim=c(1,n))
+
+for( i in 1:n )
+{
+   X <- V[ ,i];                 
+   R1[1,i] <- sum(X);
+}   
+
+if( args[3]==1 )
+{  
+  for( i in 1:n )
+  {
+     X1 <- V[i,]; 
+     X2 <- V[i,];                 
+     R2[1,i] <- R1[1,i] + sum(X1)+sum(X2);
+  }   
+} else {
+  for( i in 1:n )
+  {
+     X1 <- V[i,]; 
+     X2 <- V[,i];                 
+     R2[1,i] <- R1[1,i] + sum(X1)+sum(X2);
+  }  
+}
+
 writeMM(as(R2, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_mdatapartitioning1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_mdatapartitioning1.dml b/src/test/scripts/functions/parfor/parfor_mdatapartitioning1.dml
index 5bdfcc1..ea46c94 100644
--- a/src/test/scripts/functions/parfor/parfor_mdatapartitioning1.dml
+++ b/src/test/scripts/functions/parfor/parfor_mdatapartitioning1.dml
@@ -19,29 +19,29 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R1 = matrix(0, rows=1,cols=n); 
-R2 = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE) 
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R1[1,i] = dummy * sX; 
-}
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE) 
-{
-   X1 = V[i,];
-   X2 = V[i,];                 
-   sX1 = sum(X1);
-   sX2 = sum(X2);
-   R2[1,i] = R1[1,i]+sX1+sX2; 
-} 
-  
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R1 = matrix(0, rows=1,cols=n); 
+R2 = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE) 
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R1[1,i] = dummy * sX; 
+}
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE) 
+{
+   X1 = V[i,];
+   X2 = V[i,];                 
+   sX1 = sum(X1);
+   sX2 = sum(X2);
+   R2[1,i] = R1[1,i]+sX1+sX2; 
+} 
+  
+
 write(R2, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_mdatapartitioning2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_mdatapartitioning2.dml b/src/test/scripts/functions/parfor/parfor_mdatapartitioning2.dml
index 4f02441..99ef171 100644
--- a/src/test/scripts/functions/parfor/parfor_mdatapartitioning2.dml
+++ b/src/test/scripts/functions/parfor/parfor_mdatapartitioning2.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R1 = matrix(0,rows=1,cols=n); 
-R2 = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
-{
-   X = V[,i];                 
-   sX = sum(X);
-   R1[1,i] = dummy * sX; 
-}
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
-{
-   X1 = V[i,];
-   X2 = V[,i];                 
-   sX1 = sum(X1);
-   sX2 = sum(X2);
-   R2[1,i] = R1[1,i]+sX1+sX2; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R1 = matrix(0,rows=1,cols=n); 
+R2 = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
+{
+   X = V[,i];                 
+   sX = sum(X);
+   R1[1,i] = dummy * sX; 
+}
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, opt=NONE )
+{
+   X1 = V[i,];
+   X2 = V[,i];                 
+   sX1 = sum(X1);
+   sX2 = sum(X2);
+   R2[1,i] = R1[1,i]+sX1+sX2; 
+}   
+
 write(R2, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1.R b/src/test/scripts/functions/parfor/parfor_optimizer1.R
index a75cf9c..5044b71 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer1.R
+++ b/src/test/scripts/functions/parfor/parfor_optimizer1.R
@@ -19,37 +19,37 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-#NOTES MB: readMM returns an obj inherited from matrix
-# (it seams like it internally uses lists, which makes
-# is very slow if there are multiple passes over the data). 
-# adding 'V <- as.matrix(V1)' by more than a factor of 10.
-# However, this will always result in a dense matrix. 
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-
-m <- nrow(V);
-n <- ncol(V); 
-W <- m;
-
-R <- array(0,dim=c(n,n))
-
-for( i in 1:(n-1) )
-{
-   X <- V[ ,i];                 
-      
-   for( j in (i+1):n )  
-   {
-      Y <- V[ ,j];  
-      R[i,j] <- cor(X, Y)  
-#      print(R[i,j]);
-   }
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+#NOTES MB: readMM returns an obj inherited from matrix
+# (it seams like it internally uses lists, which makes
+# is very slow if there are multiple passes over the data). 
+# adding 'V <- as.matrix(V1)' by more than a factor of 10.
+# However, this will always result in a dense matrix. 
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+
+m <- nrow(V);
+n <- ncol(V); 
+W <- m;
+
+R <- array(0,dim=c(n,n))
+
+for( i in 1:(n-1) )
+{
+   X <- V[ ,i];                 
+      
+   for( j in (i+1):n )  
+   {
+      Y <- V[ ,j];  
+      R[i,j] <- cor(X, Y)  
+#      print(R[i,j]);
+   }
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1.dml b/src/test/scripts/functions/parfor/parfor_optimizer1.dml
index 36c3fcf..ddb7531 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer1.dml
+++ b/src/test/scripts/functions/parfor/parfor_optimizer1.dml
@@ -19,35 +19,35 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:(n-1), opt=RULEBASED )
-{
-   X = V[,i];                 
-   m2X = moment(X,2);
-   sigmaX = sqrt(m2X * (W/(W-1.0)) );
-      
-   parfor( j in (i+1):n )  
-   {  
-      Y = V[,j];
-
-      #corr computation    
-      m2Y = moment(Y,2);
-      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
-      covXY = cov(X,Y);      
-      rXY = covXY / (sigmaX*sigmaY); 
-      
-      #print("R[("+i+","+j+")]="+rXY); 
-      R[i,j] = dummy * rXY; 
-      
-   }
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n-1), opt=RULEBASED )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY); 
+      R[i,j] = dummy * rXY; 
+      
+   }
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2.R b/src/test/scripts/functions/parfor/parfor_optimizer2.R
index b6ef0e9..f31c0b5 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer2.R
+++ b/src/test/scripts/functions/parfor/parfor_optimizer2.R
@@ -19,138 +19,138 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-
-D1 <- readMM(paste(args[1], "D.mtx", sep=""))
-S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
-S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
-K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
-K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
-D <- as.matrix(D1);
-S1 <- as.matrix(S11);
-S2 <- as.matrix(S21);
-K1 <- as.matrix(K11);
-K2 <- as.matrix(K21);
-
-numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
-maxC <- args[2]; # max number of categories in any categorical attribute
-
-s1size <- ncol(S1);
-s2size <- ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats <- 8;
-basestats <- array(0,dim=c(numstats,numPairs)); 
-cat_counts <- array(0,dim=c(maxC,numPairs)); 
-cat_means <- array(0,dim=c(maxC,numPairs));
-cat_vars <- array(0,dim=c(maxC,numPairs));
-
-
-for( i in 1:s1size ) { 
-    a1 <- S1[,i];
-    k1 <- K1[1,i];
-    A1 <- as.matrix(D[,a1]);
-
-    for( j in 1:s2size ) {
-        pairID <-(i-1)*s2size+j;
-        a2 <- S2[,j];
-        k2 <- K2[1,j];
-        A2 <- as.matrix(D[,a2]);
-    
-        if (k1 == k2) {
-            if (k1 == 1) {   
-                # scale-scale
-                print("scale-scale");
-                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
-                #basestats[1,pairID] <- cor(A1, A2);
-                
-                print(basestats[1,pairID]);
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("categorical-categorical");
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal   
-                    print("ordinal-ordinal");
-                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
-                }
-            }
-        } 
-        else {       
-            if (k1 == 1 || k2 == 1) {    
-                # Scale-nominal/ordinal
-                print("scale-categorical");
-                if ( k1 == 1 ) {
-                    Av <- as.matrix(A2); 
-                    Yv <- as.matrix(A1); 
-                }
-                else {
-                    Av <- as.matrix(A1); 
-                    Yv <- as.matrix(A2); 
-                }
-                
-                W <- nrow(Av);
-                my <- mean(Yv); 
-                varY <- var(Yv);
-                
-                CFreqs <- as.matrix(table(Av)); 
-                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
-                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
-                R <- nrow(CFreqs);
-              
-                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
-                ANOVAF <- anova_num/anova_den;
-
-                basestats[7,pairID] <- Eta;
-                basestats[8,pairID] <- ANOVAF;
-
-                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
-                cat_means[ 1:length(CMeans),pairID] <- CMeans;
-                cat_vars[ 1:length(CVars),pairID] <- CVars;
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal    
-                print("nomial-ordinal"); #TODO should not be same code            
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-                
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-            }
-        }
-    }
-}
-
-writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
-writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", sep=""));
-writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", sep=""));
-writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", sep=""));
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+
+D1 <- readMM(paste(args[1], "D.mtx", sep=""))
+S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
+S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
+K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
+K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
+D <- as.matrix(D1);
+S1 <- as.matrix(S11);
+S2 <- as.matrix(S21);
+K1 <- as.matrix(K11);
+K2 <- as.matrix(K21);
+
+numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
+maxC <- args[2]; # max number of categories in any categorical attribute
+
+s1size <- ncol(S1);
+s2size <- ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats <- 8;
+basestats <- array(0,dim=c(numstats,numPairs)); 
+cat_counts <- array(0,dim=c(maxC,numPairs)); 
+cat_means <- array(0,dim=c(maxC,numPairs));
+cat_vars <- array(0,dim=c(maxC,numPairs));
+
+
+for( i in 1:s1size ) { 
+    a1 <- S1[,i];
+    k1 <- K1[1,i];
+    A1 <- as.matrix(D[,a1]);
+
+    for( j in 1:s2size ) {
+        pairID <-(i-1)*s2size+j;
+        a2 <- S2[,j];
+        k2 <- K2[1,j];
+        A2 <- as.matrix(D[,a2]);
+    
+        if (k1 == k2) {
+            if (k1 == 1) {   
+                # scale-scale
+                print("scale-scale");
+                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
+                #basestats[1,pairID] <- cor(A1, A2);
+                
+                print(basestats[1,pairID]);
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("categorical-categorical");
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal   
+                    print("ordinal-ordinal");
+                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
+                }
+            }
+        } 
+        else {       
+            if (k1 == 1 || k2 == 1) {    
+                # Scale-nominal/ordinal
+                print("scale-categorical");
+                if ( k1 == 1 ) {
+                    Av <- as.matrix(A2); 
+                    Yv <- as.matrix(A1); 
+                }
+                else {
+                    Av <- as.matrix(A1); 
+                    Yv <- as.matrix(A2); 
+                }
+                
+                W <- nrow(Av);
+                my <- mean(Yv); 
+                varY <- var(Yv);
+                
+                CFreqs <- as.matrix(table(Av)); 
+                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
+                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
+                R <- nrow(CFreqs);
+              
+                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
+                ANOVAF <- anova_num/anova_den;
+
+                basestats[7,pairID] <- Eta;
+                basestats[8,pairID] <- ANOVAF;
+
+                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
+                cat_means[ 1:length(CMeans),pairID] <- CMeans;
+                cat_vars[ 1:length(CVars),pairID] <- CVars;
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal    
+                print("nomial-ordinal"); #TODO should not be same code            
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+                
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+            }
+        }
+    }
+}
+
+writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
+writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", sep=""));
+writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", sep=""));
+writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2.dml b/src/test/scripts/functions/parfor/parfor_optimizer2.dml
index baf8792..e6007af 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer2.dml
+++ b/src/test/scripts/functions/parfor/parfor_optimizer2.dml
@@ -19,259 +19,259 @@
 #
 #-------------------------------------------------------------
 
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-#numpairs = s1size * s2size;
-#print(s1size + ", " + s2size + ", " + numpairs);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-dummy = matrix(1, rows=1, cols=1);
-
-
-parfor( i in 1:s1size, check=0, opt=RULEBASED) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, check=0) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = dummy*r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = dummy*chisq;
-                basestats[3,pairID] = dummy*df;
-                basestats[4,pairID] = dummy*pval;
-                basestats[5,pairID] = dummy*cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = dummy*sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal   
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = dummy*eta;
-                basestats[8,pairID] = dummy*f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = dummy*chisq;
-                basestats[3,pairID] = dummy*df;
-                basestats[4,pairID] = dummy*pval;
-                basestats[5,pairID] = dummy*cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = degFreedom;
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    dummy = matrix(1, rows=1, cols=1);
-    Rks = X;
-    size = nrow(X);
-    for(i in 1:size) {
-        prefixSum = 0.0;
-        if( i>1 ){
-           prefixSum = sum(X[1:(i-1),1]);
-        } 
-        Rks[i,1] = dummy * (prefixSum + ((castAsScalar(X[i,1])+1)/2));
-    }
-    Ranks = Rks;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-
-    covXY = 0.0;
-    for(i in 1:catA) {
-        covXY = covXY + sum((F[i,]/(W-1)) * (castAsScalar(C[i,1])-meanX) * (t(D[,1])-meanY));
-    }
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+dummy = matrix(1, rows=1, cols=1);
+
+
+parfor( i in 1:s1size, check=0, opt=RULEBASED) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = dummy*r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = dummy*sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal   
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = dummy*eta;
+                basestats[8,pairID] = dummy*f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = degFreedom;
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    dummy = matrix(1, rows=1, cols=1);
+    Rks = X;
+    size = nrow(X);
+    for(i in 1:size) {
+        prefixSum = 0.0;
+        if( i>1 ){
+           prefixSum = sum(X[1:(i-1),1]);
+        } 
+        Rks[i,1] = dummy * (prefixSum + ((castAsScalar(X[i,1])+1)/2));
+    }
+    Ranks = Rks;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+
+    covXY = 0.0;
+    for(i in 1:catA) {
+        covXY = covXY + sum((F[i,]/(W-1)) * (castAsScalar(C[i,1])-meanX) * (t(D[,1])-meanY));
+    }
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3.R b/src/test/scripts/functions/parfor/parfor_optimizer3.R
index 1924f77..b1f991c 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer3.R
+++ b/src/test/scripts/functions/parfor/parfor_optimizer3.R
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- ncol(V); 
-n2 <- n/2;
-
-R <- array(0,dim=c(1,n2))
-
-for( i in 1:n2 )
-{
-   X <- V[,i];                 
-   Y <- V[,n-i+1];                
-   R[1,i] <- sum(X)+sum(Y);
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- ncol(V); 
+n2 <- n/2;
+
+R <- array(0,dim=c(1,n2))
+
+for( i in 1:n2 )
+{
+   X <- V[,i];                 
+   Y <- V[,n-i+1];                
+   R[1,i] <- sum(X)+sum(Y);
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_optimizer3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3.dml b/src/test/scripts/functions/parfor/parfor_optimizer3.dml
index cb594cb..95742c4 100644
--- a/src/test/scripts/functions/parfor/parfor_optimizer3.dml
+++ b/src/test/scripts/functions/parfor/parfor_optimizer3.dml
@@ -19,34 +19,34 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-nd = $4;
-
-R = matrix(0, rows=1,cols=nd); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:(n/2), opt=RULEBASED )
-{
-   X = V[ ,i];                 
-   Y = V[ ,n-i+1];                 
-   sx = execSum(X);
-   sy = execSum(Y);
-   R[1,i] = dummy*( sx+sy ); 
-}   
-
-write(R, $5);       
-
-
-execSum = function(Matrix[Double] X) return (Double sx) 
-{
-   if( ncol(X) > 0 )
-   {
-      sx = sum(X);    
-   }
-   else
-   {
-      sx = sum(X);
-   }
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+nd = $4;
+
+R = matrix(0, rows=1,cols=nd); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n/2), opt=RULEBASED )
+{
+   X = V[ ,i];                 
+   Y = V[ ,n-i+1];                 
+   sx = execSum(X);
+   sy = execSum(Y);
+   R[1,i] = dummy*( sx+sy ); 
+}   
+
+write(R, $5);       
+
+
+execSum = function(Matrix[Double] X) return (Double sx) 
+{
+   if( ncol(X) > 0 )
+   {
+      sx = sum(X);    
+   }
+   else
+   {
+      sx = sum(X);
+   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.R
index 3bdaf5b..0a66359 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.R
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(0,m,n);
-
-for( i in 1:(n-7) )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-}   
-
-R <- R1 + R1; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(0,m,n);
+
+for( i in 1:(n-7) )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+}   
+
+R <- R1 + R1; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.dml
index d134f89..e840d2d 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1a.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(0,rows=m,cols=n);
-parfor( i in 1:(n-7), par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-}   
-
-R = R1 + R1; 
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(0,rows=m,cols=n);
+parfor( i in 1:(n-7), par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+}   
+
+R = R1 + R1; 
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.R
index 3c9bb40..466eb81 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.R
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(1,m,n);
-
-for( i in 1:(n-7) )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-}   
-
-R <- R1 + R1; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(1,m,n);
+
+for( i in 1:(n-7) )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+}   
+
+R <- R1 + R1; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.dml
index b5c533b..d83465e 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1b.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(1,rows=m,cols=n);
-parfor( i in 1:(n-7), par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-}   
-
-R = R1 + R1; 
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(1,rows=m,cols=n);
+parfor( i in 1:(n-7), par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+}   
+
+R = R1 + R1; 
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.R
index 3bdaf5b..0a66359 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.R
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(0,m,n);
-
-for( i in 1:(n-7) )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-}   
-
-R <- R1 + R1; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(0,m,n);
+
+for( i in 1:(n-7) )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+}   
+
+R <- R1 + R1; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.dml
index 7992f8d..fa01bfe 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1c.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(0,rows=m,cols=n);
-parfor( i in 1:(n-7), par=8, mode=REMOTE_SPARK, resultmerge=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-}   
-
-R = R1 + R1; 
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(0,rows=m,cols=n);
+parfor( i in 1:(n-7), par=8, mode=REMOTE_SPARK, resultmerge=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+}   
+
+R = R1 + R1; 
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.R
index 3c9bb40..466eb81 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.R
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(1,m,n);
-
-for( i in 1:(n-7) )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-}   
-
-R <- R1 + R1; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(1,m,n);
+
+for( i in 1:(n-7) )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+}   
+
+R <- R1 + R1; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.dml
index bbb7bef..02e1657 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge1d.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(1,rows=m,cols=n);
-parfor( i in 1:(n-7), par=8, mode=REMOTE_SPARK, resultmerge=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-}   
-
-R = R1 + R1; 
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(1,rows=m,cols=n);
+parfor( i in 1:(n-7), par=8, mode=REMOTE_SPARK, resultmerge=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+}   
+
+R = R1 + R1; 
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.R
index 3248ec1..7068fd3 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.R
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(0,m,n);
-R2 <- matrix(0,m,n);
-
-for( i in 1:n )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-   R2[,i] <- X;
-}   
-
-R <- R1 + R2; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(0,m,n);
+R2 <- matrix(0,m,n);
+
+for( i in 1:n )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+   R2[,i] <- X;
+}   
+
+R <- R1 + R2; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.dml
index 00dc45c..5d55730 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge2.dml
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(0,rows=m,cols=n);
-R2 = matrix(0,rows=m,cols=n);
-parfor( i in 1:n, par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-   R2[,i] = X;                 
-}   
-
-R = R1 + R2; 
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(0,rows=m,cols=n);
+R2 = matrix(0,rows=m,cols=n);
+parfor( i in 1:n, par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+   R2[,i] = X;                 
+}   
+
+R = R1 + R2; 
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.R b/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.R
index 1347335..f1e9dd0 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.R
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.R
@@ -19,86 +19,86 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-m <- nrow(V); 
-n <- ncol(V); 
-
-R1 <- matrix(0,m,n);
-R2 <- matrix(0,m,n);
-R3 <- matrix(0,m,n);
-R4 <- matrix(0,m,n);
-R5 <- matrix(0,m,n);
-R6 <- matrix(0,m,n);
-R7 <- matrix(0,m,n);
-R8 <- matrix(0,m,n);
-R9 <- matrix(0,m,n);
-R10 <- matrix(0,m,n);
-R11 <- matrix(0,m,n);
-R12 <- matrix(0,m,n);
-R13 <- matrix(0,m,n);
-R14 <- matrix(0,m,n);
-R15 <- matrix(0,m,n);
-R16 <- matrix(0,m,n);
-R17 <- matrix(0,m,n);
-R18 <- matrix(0,m,n);
-R19 <- matrix(0,m,n);
-R20 <- matrix(0,m,n);
-R21 <- matrix(0,m,n);
-R22 <- matrix(0,m,n);
-R23 <- matrix(0,m,n);
-R24 <- matrix(0,m,n);
-R25 <- matrix(0,m,n);
-R26 <- matrix(0,m,n);
-R27 <- matrix(0,m,n);
-R28 <- matrix(0,m,n);
-R29 <- matrix(0,m,n);
-R30 <- matrix(0,m,n);
-R31 <- matrix(0,m,n);
-R32 <- matrix(0,m,n);
-
-for( i in 1:n )
-{
-   X <- V[,i];
-   R1[,i] <- X;
-   R2[,i] <- X;
-   R3[,i] <- X;
-   R4[,i] <- X;
-   R5[,i] <- X;
-   R6[,i] <- X;
-   R7[,i] <- X;
-   R8[,i] <- X;
-   R9[,i] <- X;
-   R10[,i] <- X;
-   R11[,i] <- X;
-   R12[,i] <- X;
-   R13[,i] <- X;
-   R14[,i] <- X;
-   R15[,i] <- X;
-   R16[,i] <- X;
-   R17[,i] <- X;
-   R18[,i] <- X;
-   R19[,i] <- X;
-   R20[,i] <- X;
-   R21[,i] <- X;
-   R22[,i] <- X;
-   R23[,i] <- X;
-   R24[,i] <- X;
-   R25[,i] <- X;
-   R26[,i] <- X;
-   R27[,i] <- X;
-   R28[,i] <- X;
-   R29[,i] <- X;
-   R30[,i] <- X;
-   R31[,i] <- X;
-   R32[,i] <- X;
-}   
-
-R <- R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + R16 + R17 + R18 + R19 + R20 + R21 + R22 + R23 + R24 + R25 + R26 + R27 + R28 + R29 + R30 + R31 + R32; 
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+m <- nrow(V); 
+n <- ncol(V); 
+
+R1 <- matrix(0,m,n);
+R2 <- matrix(0,m,n);
+R3 <- matrix(0,m,n);
+R4 <- matrix(0,m,n);
+R5 <- matrix(0,m,n);
+R6 <- matrix(0,m,n);
+R7 <- matrix(0,m,n);
+R8 <- matrix(0,m,n);
+R9 <- matrix(0,m,n);
+R10 <- matrix(0,m,n);
+R11 <- matrix(0,m,n);
+R12 <- matrix(0,m,n);
+R13 <- matrix(0,m,n);
+R14 <- matrix(0,m,n);
+R15 <- matrix(0,m,n);
+R16 <- matrix(0,m,n);
+R17 <- matrix(0,m,n);
+R18 <- matrix(0,m,n);
+R19 <- matrix(0,m,n);
+R20 <- matrix(0,m,n);
+R21 <- matrix(0,m,n);
+R22 <- matrix(0,m,n);
+R23 <- matrix(0,m,n);
+R24 <- matrix(0,m,n);
+R25 <- matrix(0,m,n);
+R26 <- matrix(0,m,n);
+R27 <- matrix(0,m,n);
+R28 <- matrix(0,m,n);
+R29 <- matrix(0,m,n);
+R30 <- matrix(0,m,n);
+R31 <- matrix(0,m,n);
+R32 <- matrix(0,m,n);
+
+for( i in 1:n )
+{
+   X <- V[,i];
+   R1[,i] <- X;
+   R2[,i] <- X;
+   R3[,i] <- X;
+   R4[,i] <- X;
+   R5[,i] <- X;
+   R6[,i] <- X;
+   R7[,i] <- X;
+   R8[,i] <- X;
+   R9[,i] <- X;
+   R10[,i] <- X;
+   R11[,i] <- X;
+   R12[,i] <- X;
+   R13[,i] <- X;
+   R14[,i] <- X;
+   R15[,i] <- X;
+   R16[,i] <- X;
+   R17[,i] <- X;
+   R18[,i] <- X;
+   R19[,i] <- X;
+   R20[,i] <- X;
+   R21[,i] <- X;
+   R22[,i] <- X;
+   R23[,i] <- X;
+   R24[,i] <- X;
+   R25[,i] <- X;
+   R26[,i] <- X;
+   R27[,i] <- X;
+   R28[,i] <- X;
+   R29[,i] <- X;
+   R30[,i] <- X;
+   R31[,i] <- X;
+   R32[,i] <- X;
+}   
+
+R <- R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + R16 + R17 + R18 + R19 + R20 + R21 + R22 + R23 + R24 + R25 + R26 + R27 + R28 + R29 + R30 + R31 + R32; 
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.dml b/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.dml
index 7d8d486..f1d8dd9 100644
--- a/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.dml
+++ b/src/test/scripts/functions/parfor/parfor_pr_resultmerge32.dml
@@ -19,79 +19,79 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-
-R1 = matrix(0,rows=m,cols=n);
-R2 = matrix(0,rows=m,cols=n);
-R3 = matrix(0,rows=m,cols=n);
-R4 = matrix(0,rows=m,cols=n);
-R5 = matrix(0,rows=m,cols=n);
-R6 = matrix(0,rows=m,cols=n);
-R7 = matrix(0,rows=m,cols=n);
-R8 = matrix(0,rows=m,cols=n);
-R9 = matrix(0,rows=m,cols=n);
-R10 = matrix(0,rows=m,cols=n);
-R11 = matrix(0,rows=m,cols=n);
-R12 = matrix(0,rows=m,cols=n);
-R13 = matrix(0,rows=m,cols=n);
-R14 = matrix(0,rows=m,cols=n);
-R15 = matrix(0,rows=m,cols=n);
-R16 = matrix(0,rows=m,cols=n);
-R17 = matrix(0,rows=m,cols=n);
-R18 = matrix(0,rows=m,cols=n);
-R19 = matrix(0,rows=m,cols=n);
-R20 = matrix(0,rows=m,cols=n);
-R21 = matrix(0,rows=m,cols=n);
-R22 = matrix(0,rows=m,cols=n);
-R23 = matrix(0,rows=m,cols=n);
-R24 = matrix(0,rows=m,cols=n);
-R25 = matrix(0,rows=m,cols=n);
-R26 = matrix(0,rows=m,cols=n);
-R27 = matrix(0,rows=m,cols=n);
-R28 = matrix(0,rows=m,cols=n);
-R29 = matrix(0,rows=m,cols=n);
-R30 = matrix(0,rows=m,cols=n);
-R31 = matrix(0,rows=m,cols=n);
-R32 = matrix(0,rows=m,cols=n);
-parfor( i in 1:n, par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[,i];
-   R1[,i] = X;
-   R2[,i] = X;
-   R3[,i] = X;
-   R4[,i] = X;
-   R5[,i] = X;
-   R6[,i] = X;
-   R7[,i] = X;
-   R8[,i] = X;
-   R9[,i] = X;
-   R10[,i] = X;
-   R11[,i] = X;
-   R12[,i] = X;
-   R13[,i] = X;
-   R14[,i] = X;
-   R15[,i] = X;
-   R16[,i] = X;
-   R17[,i] = X;
-   R18[,i] = X;
-   R19[,i] = X;
-   R20[,i] = X;
-   R21[,i] = X;
-   R22[,i] = X;
-   R23[,i] = X;
-   R24[,i] = X;
-   R25[,i] = X;
-   R26[,i] = X;
-   R27[,i] = X;
-   R28[,i] = X;
-   R29[,i] = X;
-   R30[,i] = X;
-   R31[,i] = X;
-   R32[,i] = X;                 
-}   
-
-R = R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + R16 + R17 + R18 + R19 + R20 + R21 + R22 + R23 + R24 + R25 + R26 + R27 + R28 + R29 + R30 + R31 + R32; 
-write(R, $4);       
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+
+R1 = matrix(0,rows=m,cols=n);
+R2 = matrix(0,rows=m,cols=n);
+R3 = matrix(0,rows=m,cols=n);
+R4 = matrix(0,rows=m,cols=n);
+R5 = matrix(0,rows=m,cols=n);
+R6 = matrix(0,rows=m,cols=n);
+R7 = matrix(0,rows=m,cols=n);
+R8 = matrix(0,rows=m,cols=n);
+R9 = matrix(0,rows=m,cols=n);
+R10 = matrix(0,rows=m,cols=n);
+R11 = matrix(0,rows=m,cols=n);
+R12 = matrix(0,rows=m,cols=n);
+R13 = matrix(0,rows=m,cols=n);
+R14 = matrix(0,rows=m,cols=n);
+R15 = matrix(0,rows=m,cols=n);
+R16 = matrix(0,rows=m,cols=n);
+R17 = matrix(0,rows=m,cols=n);
+R18 = matrix(0,rows=m,cols=n);
+R19 = matrix(0,rows=m,cols=n);
+R20 = matrix(0,rows=m,cols=n);
+R21 = matrix(0,rows=m,cols=n);
+R22 = matrix(0,rows=m,cols=n);
+R23 = matrix(0,rows=m,cols=n);
+R24 = matrix(0,rows=m,cols=n);
+R25 = matrix(0,rows=m,cols=n);
+R26 = matrix(0,rows=m,cols=n);
+R27 = matrix(0,rows=m,cols=n);
+R28 = matrix(0,rows=m,cols=n);
+R29 = matrix(0,rows=m,cols=n);
+R30 = matrix(0,rows=m,cols=n);
+R31 = matrix(0,rows=m,cols=n);
+R32 = matrix(0,rows=m,cols=n);
+parfor( i in 1:n, par=8, mode=REMOTE_MR, resultmerge=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[,i];
+   R1[,i] = X;
+   R2[,i] = X;
+   R3[,i] = X;
+   R4[,i] = X;
+   R5[,i] = X;
+   R6[,i] = X;
+   R7[,i] = X;
+   R8[,i] = X;
+   R9[,i] = X;
+   R10[,i] = X;
+   R11[,i] = X;
+   R12[,i] = X;
+   R13[,i] = X;
+   R14[,i] = X;
+   R15[,i] = X;
+   R16[,i] = X;
+   R17[,i] = X;
+   R18[,i] = X;
+   R19[,i] = X;
+   R20[,i] = X;
+   R21[,i] = X;
+   R22[,i] = X;
+   R23[,i] = X;
+   R24[,i] = X;
+   R25[,i] = X;
+   R26[,i] = X;
+   R27[,i] = X;
+   R28[,i] = X;
+   R29[,i] = X;
+   R30[,i] = X;
+   R31[,i] = X;
+   R32[,i] = X;                 
+}   
+
+R = R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + R16 + R17 + R18 + R19 + R20 + R21 + R22 + R23 + R24 + R25 + R26 + R27 + R28 + R29 + R30 + R31 + R32; 
+write(R, $4);       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartition_leftindexing.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartition_leftindexing.dml b/src/test/scripts/functions/parfor/parfor_rdatapartition_leftindexing.dml
index 46180f9..b3fed2d 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartition_leftindexing.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartition_leftindexing.dml
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1);
-m = nrow(V);
-n = ncol(V);
-
-R = matrix(0,rows=m,cols=n); 
-
-#parfor( i in 1:m, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, resultmerge=REMOTE_MR, opt=NONE )
-parfor( i in 1:m )
-{
-   col = V[i,];
-   if(1==1){}
-   R[i,] = col; 
-}   
-
+
+V = read($1);
+m = nrow(V);
+n = ncol(V);
+
+R = matrix(0,rows=m,cols=n); 
+
+#parfor( i in 1:m, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, resultmerge=REMOTE_MR, opt=NONE )
+parfor( i in 1:m )
+{
+   col = V[i,];
+   if(1==1){}
+   R[i,] = col; 
+}   
+
 write(R, $2);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning.R b/src/test/scripts/functions/parfor/parfor_rdatapartitioning.R
index 295e490..2326b0b 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning.R
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- nrow(V); 
-
-R <- array(0,dim=c(1,n))
-
-for( i in 1:n )
-{
-   X <- V[i,];                 
-   R[1,i] <- sum(X);
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- nrow(V); 
+
+R <- array(0,dim=c(1,n))
+
+for( i in 1:n )
+{
+   X <- V[i,];                 
+   R[1,i] <- sum(X);
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning1.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning1.dml
index 013b086..9c3188b 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning1.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning1.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=NONE,  taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=NONE,  taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning2.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning2.dml
index 757fedd..116013f 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning2.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning2.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning3.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning3.dml
index c38dd13..42bc04e 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning3.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning3.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0, rows=1,cols=n); 
-dummy = matrix(1, rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0, rows=1,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=LOCAL, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning4.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning4.dml
index 2e5897d..15753b2 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning4.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning4.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_MR, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning5.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning5.dml
index 1442806..a6ba3a6 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning5.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning5.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=REMOTE_MR,  taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_MR,datapartitioner=REMOTE_MR,  taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.R b/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.R
index 7492569..89b4bd4 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.R
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.R
@@ -19,22 +19,22 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-n <- nrow(V); 
-
-R <- array(0,dim=c(1,n))
-
-for( i in 1:n-1 )
-{
-   X <- V[i:(i+1),];                 
-   R[1,i] <- sum(X);
-}   
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+n <- nrow(V); 
+
+R <- array(0,dim=c(1,n))
+
+for( i in 1:n-1 )
+{
+   X <- V[i:(i+1),];                 
+   R[1,i] <- sum(X);
+}   
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.dml
index ff53054..18f78b0 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning6.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_MR,  taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i:(i+1),];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_MR,  taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i:(i+1),];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning7.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning7.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning7.dml
index 79354dd..494e2b0 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning7.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning7.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK, taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning8.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning8.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning8.dml
index 516a56a..ac77777 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning8.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning8.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n, par=4, mode=REMOTE_SPARK,datapartitioner=REMOTE_SPARK,  taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i,];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n, par=4, mode=REMOTE_SPARK,datapartitioner=REMOTE_SPARK,  taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i,];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_rdatapartitioning9.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_rdatapartitioning9.dml b/src/test/scripts/functions/parfor/parfor_rdatapartitioning9.dml
index 89254c5..cea1134 100644
--- a/src/test/scripts/functions/parfor/parfor_rdatapartitioning9.dml
+++ b/src/test/scripts/functions/parfor/parfor_rdatapartitioning9.dml
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $2;
-
-R = matrix(0,rows=1,cols=n); 
-dummy = matrix(1,rows=1, cols=1);
-
-parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK,  taskpartitioner=FACTORING, opt=NONE )
-{
-   X = V[i:(i+1),];                 
-   sX = sum(X);
-   R[1,i] = dummy * sX; 
-}   
-
+
+V = read($1,rows=$2,cols=$3);
+n = $2;
+
+R = matrix(0,rows=1,cols=n); 
+dummy = matrix(1,rows=1, cols=1);
+
+parfor( i in 1:n-1, par=4, mode=LOCAL, datapartitioner=REMOTE_SPARK,  taskpartitioner=FACTORING, opt=NONE )
+{
+   X = V[i:(i+1),];                 
+   sX = sum(X);
+   R[1,i] = dummy * sX; 
+}   
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt1.R b/src/test/scripts/functions/parfor/parfor_repeatedopt1.R
index 4cf2166..f4c459b 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt1.R
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt1.R
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-n = ncol(V); 
-R = matrix(0, 1, n);
-
-iter = 1;
-while( iter <= 3 )
-{
-   for( i in 1:ncol(V) )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+n = ncol(V); 
+R = matrix(0, 1, n);
+
+iter = 1;
+while( iter <= 3 )
+{
+   for( i in 1:ncol(V) )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt1.dml b/src/test/scripts/functions/parfor/parfor_repeatedopt1.dml
index a804d9e..724dc4b 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt1.dml
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt1.dml
@@ -19,23 +19,23 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-
-iter = 1;
-while( iter <= 3 )
-{
-   #repeated opt for each while iteration
-   parfor( i in 1:ncol(V), log=DEBUG )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+
+iter = 1;
+while( iter <= 3 )
+{
+   #repeated opt for each while iteration
+   parfor( i in 1:ncol(V), log=DEBUG )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt2.R b/src/test/scripts/functions/parfor/parfor_repeatedopt2.R
index 3093697..0858862 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt2.R
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt2.R
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-n = ncol(V); 
-R = matrix(0, 1, n);
-
-iter = 1;
-while( iter <= 3 )
-{
-   if( as.integer(args[3])==1 )
-   {
-      V = V * iter;
-   }
-   
-   for( i in 1:ncol(V) )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+n = ncol(V); 
+R = matrix(0, 1, n);
+
+iter = 1;
+while( iter <= 3 )
+{
+   if( as.integer(args[3])==1 )
+   {
+      V = V * iter;
+   }
+   
+   for( i in 1:ncol(V) )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt2.dml b/src/test/scripts/functions/parfor/parfor_repeatedopt2.dml
index c7f141c..c61761f 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt2.dml
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt2.dml
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-
-iter = 1;
-while( iter <= 3 )
-{
-   if( $5==1 )
-   {
-      V = V * iter;
-   }
-
-   #repeated opt for each while iteration
-   parfor( i in 1:ncol(V), log=DEBUG )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+
+iter = 1;
+while( iter <= 3 )
+{
+   if( $5==1 )
+   {
+      V = V * iter;
+   }
+
+   #repeated opt for each while iteration
+   parfor( i in 1:ncol(V), log=DEBUG )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt3.R b/src/test/scripts/functions/parfor/parfor_repeatedopt3.R
index 17d532b..889d692 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt3.R
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt3.R
@@ -19,34 +19,34 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
-n = ncol(V); 
-R = matrix(0, 1, n);
-
-iter = 1;
-while( iter <= 3 )
-{
-   if( as.integer(args[3])==1 )
-   {
-      vx = matrix(1,nrow(V),1)*iter;
-      V = cbind(V, vx);
-      rx = matrix(0,1,1);
-      R = cbind(R, rx);
-   }
-   
-   for( i in 1:ncol(V) )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V = as.matrix(readMM(paste(args[1], "V.mtx", sep="")))
+n = ncol(V); 
+R = matrix(0, 1, n);
+
+iter = 1;
+while( iter <= 3 )
+{
+   if( as.integer(args[3])==1 )
+   {
+      vx = matrix(1,nrow(V),1)*iter;
+      V = cbind(V, vx);
+      rx = matrix(0,1,1);
+      R = cbind(R, rx);
+   }
+   
+   for( i in 1:ncol(V) )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/parfor/parfor_repeatedopt3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_repeatedopt3.dml b/src/test/scripts/functions/parfor/parfor_repeatedopt3.dml
index 0f1c313..8254388 100644
--- a/src/test/scripts/functions/parfor/parfor_repeatedopt3.dml
+++ b/src/test/scripts/functions/parfor/parfor_repeatedopt3.dml
@@ -19,31 +19,31 @@
 #
 #-------------------------------------------------------------
 
-
-V = read($1,rows=$2,cols=$3);
-n = $3;
-
-R = matrix(0, rows=1,cols=n); 
-
-iter = 1;
-while( iter <= 3 )
-{
-   if( $5==1 )
-   {
-      vx = matrix(1,rows=nrow(V),cols=1)*iter;
-      V = append(V, vx);
-      rx = matrix(0,rows=1,cols=1);
-      R = append(R, rx);
-   }
-
-   #repeated opt for each while iteration
-   parfor( i in 1:ncol(V), log=DEBUG )
-   {
-      Xi = V[,i];
-      R[1,i] = R[1,i] + sum(Xi);
-   }
-   
-   iter = iter+1;
-}
-
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+
+R = matrix(0, rows=1,cols=n); 
+
+iter = 1;
+while( iter <= 3 )
+{
+   if( $5==1 )
+   {
+      vx = matrix(1,rows=nrow(V),cols=1)*iter;
+      V = append(V, vx);
+      rx = matrix(0,rows=1,cols=1);
+      R = append(R, rx);
+   }
+
+   #repeated opt for each while iteration
+   parfor( i in 1:ncol(V), log=DEBUG )
+   {
+      Xi = V[,i];
+      R[1,i] = R[1,i] + sum(Xi);
+   }
+   
+   iter = iter+1;
+}
+
 write(R, $4);       
\ No newline at end of file



[55/55] incubator-systemml git commit: [SYSTEMML-484] Create javadoc jar during build

Posted by du...@apache.org.
[SYSTEMML-484] Create javadoc jar during build

Exclude selected packages from javadocs.
Ignore doclint warnings for javadocs on Java 8 until warnings fixed.

Closes #52.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ffcdf6ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ffcdf6ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ffcdf6ea

Branch: refs/heads/branch-0.9
Commit: ffcdf6ea37f0bc193397c78837eb3703b4258b4a
Parents: d766fbf
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Mon Jan 25 16:38:15 2016 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Mon Jan 25 16:38:15 2016 -0800

----------------------------------------------------------------------
 pom.xml | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ffcdf6ea/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ae654dd..4b7061c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -504,6 +504,17 @@
 		</profile>
 
 		<profile>
+			<!-- Need to ignore doclint warnings for javadocs generated on Java 8 until warnings are fixed -->
+			<id>ignore-doclint-warnings-for-javadocs-on-java-8</id>
+			<activation>
+				<jdk>[1.8,)</jdk>
+			</activation>
+			<properties>
+				<javadoc.opts>-Xdoclint:none</javadoc.opts>
+			</properties>
+		</profile>
+
+		<profile>
 			<!-- Profile to create binary distributions.
 				Execute with `mvn clean package -P distribution` -->
 			<id>distribution</id>
@@ -637,8 +648,24 @@
 						<artifactId>maven-javadoc-plugin</artifactId>
 						<version>2.10.3</version>
 						<configuration>
-							<maxmemory>1024m</maxmemory>
+							<!-- Need to include the following packages, so exclude others:
+								org.apache.sysml.api
+								org.apache.sysml.runtime.instructions.spark.utils (for RDDConverterUtils, etc)
+								org.apache.sysml.runtime.matrix (for MatrixCharacteristics, etc)
+								org.apache.sysml.runtime.matrix.data (for MatrixIndexes, MatrixBlock, etc)
+								org.apache.sysml.udf
+							-->
+							<excludePackageNames>org.apache.sysml.conf:org.apache.sysml.debug:org.apache.sysml.hops:org.apache.sysml.lops:org.apache.sysml.parser:org.apache.sysml.runtime.controlprogram:org.apache.sysml.runtime.functionobjects:org.apache.sysml.runtime.instructions.cp:org.apache.sysml.runtime.instructions.cpfile:org.apache.sysml.runtime.instructions.mr:org.apache.sysml.runtime.instructions.spark.data:org.apache.sysml.runtime.instructions.spark.functions:org.apache.sysml.runtime.io:org.apache.sysml.runtime.matrix.data.hadoopfix:org.apache.sysml.runtime.matrix.mapred:org.apache.sysml.runtime.matrix.operators:org.apache.sysml.runtime.matrix.sort:org.apache.sysml.runtime.transform:org.apache.sysml.runtime.util:org.apache.sysml.utils:org.apache.sysml.yarn</excludePackageNames>
+							<additionalparam>${javadoc.opts}</additionalparam>
 						</configuration>
+						<executions>
+							<execution>
+								<id>attach-javadocs</id>
+								<goals>
+									<goal>jar</goal>
+								</goals>
+							</execution>
+						</executions>
 					</plugin>
 				</plugins>
 			</build>


[12/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregDSsimpl.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregDSsimpl.R b/src/test/scripts/functions/gdfo/LinregDSsimpl.R
index 5d42300..5b90875 100644
--- a/src/test/scripts/functions/gdfo/LinregDSsimpl.R
+++ b/src/test/scripts/functions/gdfo/LinregDSsimpl.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-y = as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
-I = as.vector(matrix(1, ncol(X), 1));
-lambda = as.double(args[3]);
-A = t(X) %*% X + diag(I)*lambda;
-b = t(X) %*% y;
-beta = solve(A, b);
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+y = as.matrix(readMM(paste(args[1], "y.mtx", sep="")))
+I = as.vector(matrix(1, ncol(X), 1));
+lambda = as.double(args[3]);
+A = t(X) %*% X + diag(I)*lambda;
+b = t(X) %*% y;
+beta = solve(A, b);
+
 writeMM(as(beta,"CsparseMatrix"), paste(args[4], "B", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/LinregDSsimpl.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/LinregDSsimpl.dml b/src/test/scripts/functions/gdfo/LinregDSsimpl.dml
index 61849e2..576d1ce 100644
--- a/src/test/scripts/functions/gdfo/LinregDSsimpl.dml
+++ b/src/test/scripts/functions/gdfo/LinregDSsimpl.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-y = read($2);
-I = matrix(1, ncol(X), 1);
-lambda = $4;
-A = t(X) %*% X + diag(I)*lambda;
-b = t(X) %*% y;
-beta = solve(A, b);
-
-write(beta, $5);
+
+X = read($1);
+y = read($2);
+I = matrix(1, ncol(X), 1);
+lambda = $4;
+A = t(X) %*% X + diag(I)*lambda;
+b = t(X) %*% y;
+beta = solve(A, b);
+
+write(beta, $5);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/MMChainLoop.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/MMChainLoop.R b/src/test/scripts/functions/gdfo/MMChainLoop.R
index 68aca3a..e2177f7 100644
--- a/src/test/scripts/functions/gdfo/MMChainLoop.R
+++ b/src/test/scripts/functions/gdfo/MMChainLoop.R
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-v = readMM(paste(args[1], "v.mtx", sep=""))
-maxiter = as.double(args[2]);
-
-i = 0;
-while(i < maxiter) {
-	v = t(X) %*% (X %*% v);
-	i = i + 1;
-}
-
-writeMM(as(v,"CsparseMatrix"), paste(args[3], "w", sep=""))
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+v = readMM(paste(args[1], "v.mtx", sep=""))
+maxiter = as.double(args[2]);
+
+i = 0;
+while(i < maxiter) {
+	v = t(X) %*% (X %*% v);
+	i = i + 1;
+}
+
+writeMM(as(v,"CsparseMatrix"), paste(args[3], "w", sep=""))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/gdfo/MMChainLoop.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/gdfo/MMChainLoop.dml b/src/test/scripts/functions/gdfo/MMChainLoop.dml
index c3d151d..a68c79b 100644
--- a/src/test/scripts/functions/gdfo/MMChainLoop.dml
+++ b/src/test/scripts/functions/gdfo/MMChainLoop.dml
@@ -19,19 +19,19 @@
 #
 #-------------------------------------------------------------
 
-
-X = read($1);
-v = read($2);
-maxiter = $3;
-
-i = 0;
-while(i < maxiter) {
-	v = t(X) %*% (X %*% v);
-	i = i + 1;
-}
-
-write(v, $4);
-
-
-
-
+
+X = read($1);
+v = read($2);
+maxiter = $3;
+
+i = 0;
+while(i < maxiter) {
+	v = t(X) %*% (X %*% v);
+	i = i + 1;
+}
+
+write(v, $4);
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/Jdk7IssueTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/Jdk7IssueTest.R b/src/test/scripts/functions/indexing/Jdk7IssueTest.R
index 56b6e7e..532300b 100644
--- a/src/test/scripts/functions/indexing/Jdk7IssueTest.R
+++ b/src/test/scripts/functions/indexing/Jdk7IssueTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-M = as.matrix(readMM(paste(args[1], "M.mtx", sep="")))
-
-R1 = matrix(0, nrow(M), ncol(M));
-for( i in 1:10 ) {
-   R1[,i] = M[,i]; 
-}
-
-R = t(colSums(R1));
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+M = as.matrix(readMM(paste(args[1], "M.mtx", sep="")))
+
+R1 = matrix(0, nrow(M), ncol(M));
+for( i in 1:10 ) {
+   R1[,i] = M[,i]; 
+}
+
+R = t(colSums(R1));
 writeMM(as(R,"CsparseMatrix"), paste(args[2], "R", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/Jdk7IssueTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/Jdk7IssueTest.dml b/src/test/scripts/functions/indexing/Jdk7IssueTest.dml
index 132de6e..87eadf9 100644
--- a/src/test/scripts/functions/indexing/Jdk7IssueTest.dml
+++ b/src/test/scripts/functions/indexing/Jdk7IssueTest.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-M = read($1)
-R1 = matrix(0, rows=nrow(M), cols=ncol(M));
-
-for( i in 1:10 ) {
-   #M[,i] fails on i=5
-   R1[,i] = M[,i]; 
-}
-
-R = colSums(R1);
-write(R, $2, format="text")
+
+M = read($1)
+R1 = matrix(0, rows=nrow(M), cols=ncol(M));
+
+for( i in 1:10 ) {
+   #M[,i] fails on i=5
+   R1[,i] = M[,i]; 
+}
+
+R = colSums(R1);
+write(R, $2, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingScalarTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingScalarTest.R b/src/test/scripts/functions/indexing/LeftIndexingScalarTest.R
index 247aa0b..ee607bc 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingScalarTest.R
+++ b/src/test/scripts/functions/indexing/LeftIndexingScalarTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A=as.matrix(A1);
-
-A[13:13,1026:1026] = 7;
-A[14:14,1027:1027] = 7*7;
-A[1013,26] = 7;
-A[1014,27] = 7*7;
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A=as.matrix(A1);
+
+A[13:13,1026:1026] = 7;
+A[14:14,1027:1027] = 7*7;
+A[1013,26] = 7;
+A[1014,27] = 7*7;
+
 writeMM(as(A,"CsparseMatrix"), paste(args[2], "A", sep=""), format="text")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingScalarTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingScalarTest.dml b/src/test/scripts/functions/indexing/LeftIndexingScalarTest.dml
index 428f7a5..2bcfb6d 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingScalarTest.dml
+++ b/src/test/scripts/functions/indexing/LeftIndexingScalarTest.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A=read($1, rows=$2, cols=$3, format="text")
-
-A[13:13,1026:1026] = 7;
-A[14:14,1027:1027] = 7*7;
-A[1013,26] = 7;
-A[1014,27] = 7*7;
-
-write(A, $4, format="text")
+
+A=read($1, rows=$2, cols=$3, format="text")
+
+A[13:13,1026:1026] = 7;
+A[14:14,1027:1027] = 7*7;
+A[1013,26] = 7;
+A[1014,27] = 7*7;
+
+write(A, $4, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.R b/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.R
index fd785b3..513adda 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.R
+++ b/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-cl = as.integer(args[2]);
-cu = as.integer(args[3]);
-
-R = A;
-R[,cl:cu] = B;
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+cl = as.integer(args[2]);
+cu = as.integer(args[3]);
+
+R = A;
+R[,cl:cu] = B;
+
 writeMM(as(R,"CsparseMatrix"), paste(args[4], "R", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.dml b/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.dml
index 80bc3db..a900831 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.dml
+++ b/src/test/scripts/functions/indexing/LeftIndexingSparseDenseTest.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1);
-B = read($2);
-
-R = A;
-R[,$3:$4] = B;
-
+
+A = read($1);
+B = read($2);
+
+R = A;
+R[,$3:$4] = B;
+
 write(R, $5, format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.R b/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.R
index fd785b3..513adda 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.R
+++ b/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
-cl = as.integer(args[2]);
-cu = as.integer(args[3]);
-
-R = A;
-R[,cl:cu] = B;
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B = as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+cl = as.integer(args[2]);
+cu = as.integer(args[3]);
+
+R = A;
+R[,cl:cu] = B;
+
 writeMM(as(R,"CsparseMatrix"), paste(args[4], "R", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.dml b/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.dml
index 80bc3db..a900831 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.dml
+++ b/src/test/scripts/functions/indexing/LeftIndexingSparseSparseTest.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1);
-B = read($2);
-
-R = A;
-R[,$3:$4] = B;
-
+
+A = read($1);
+B = read($2);
+
+R = A;
+R[,$3:$4] = B;
+
 write(R, $5, format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingTest.R b/src/test/scripts/functions/indexing/LeftIndexingTest.R
index 79ced75..d8a07ba 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingTest.R
+++ b/src/test/scripts/functions/indexing/LeftIndexingTest.R
@@ -19,26 +19,26 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-B1=readMM(paste(args[1], "B.mtx", sep=""))
-C1=readMM(paste(args[1], "C.mtx", sep=""))
-D1=readMM(paste(args[1], "D.mtx", sep=""))
-A=as.matrix(A1);
-B=as.matrix(B1);
-C=as.matrix(C1);
-D=as.matrix(D1);
-
-A[args[2]:args[3],args[4]:args[5]]=0
-A[args[2]:args[3],args[4]:args[5]]=B
-writeMM(as(A,"CsparseMatrix"), paste(args[6], "AB", sep=""), format="text")
-A[1:args[3],args[4]:ncol(A)]=0
-A[1:args[3],args[4]:ncol(A)]=C
-writeMM(as(A,"CsparseMatrix"), paste(args[6], "AC", sep=""), format="text")
-A[,args[4]:args[5]]=0
-A[,args[4]:args[5]]=D
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+B1=readMM(paste(args[1], "B.mtx", sep=""))
+C1=readMM(paste(args[1], "C.mtx", sep=""))
+D1=readMM(paste(args[1], "D.mtx", sep=""))
+A=as.matrix(A1);
+B=as.matrix(B1);
+C=as.matrix(C1);
+D=as.matrix(D1);
+
+A[args[2]:args[3],args[4]:args[5]]=0
+A[args[2]:args[3],args[4]:args[5]]=B
+writeMM(as(A,"CsparseMatrix"), paste(args[6], "AB", sep=""), format="text")
+A[1:args[3],args[4]:ncol(A)]=0
+A[1:args[3],args[4]:ncol(A)]=C
+writeMM(as(A,"CsparseMatrix"), paste(args[6], "AC", sep=""), format="text")
+A[,args[4]:args[5]]=0
+A[,args[4]:args[5]]=D
 writeMM(as(A,"CsparseMatrix"), paste(args[6], "AD", sep=""), format="text")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/LeftIndexingTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/LeftIndexingTest.dml b/src/test/scripts/functions/indexing/LeftIndexingTest.dml
index d40b68d..abef026 100644
--- a/src/test/scripts/functions/indexing/LeftIndexingTest.dml
+++ b/src/test/scripts/functions/indexing/LeftIndexingTest.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-A=read($1, rows=$2, cols=$3, format="text")
-B=read($11, rows=$14, cols=$15, format="text")
-C=read($12, rows=$5, cols=$16, format="text")
-D=read($13, rows=$2, cols=$15, format="text")
-A[$4:$5,$6:$7]=B
-write(A, $8, format="text")
-A[1:$5,$6:ncol(A)]=C
-write(A, $9, format="text")
-A[,$6:$7]=D
+
+A=read($1, rows=$2, cols=$3, format="text")
+B=read($11, rows=$14, cols=$15, format="text")
+C=read($12, rows=$5, cols=$16, format="text")
+D=read($13, rows=$2, cols=$15, format="text")
+A[$4:$5,$6:$7]=B
+write(A, $8, format="text")
+A[1:$5,$6:ncol(A)]=C
+write(A, $9, format="text")
+A[,$6:$7]=D
 write(A, $10, format="text")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/RightIndexingMatrixTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/RightIndexingMatrixTest.R b/src/test/scripts/functions/indexing/RightIndexingMatrixTest.R
index e6b32ab..62009d7 100644
--- a/src/test/scripts/functions/indexing/RightIndexingMatrixTest.R
+++ b/src/test/scripts/functions/indexing/RightIndexingMatrixTest.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.matrix(A1);
-
-B=A[args[2]:args[3],args[4]:args[5]]
-C=A[1:args[3],args[4]:ncol(A)]
-D=A[,args[4]:args[5]]
-writeMM(as(B,"CsparseMatrix"), paste(args[6], "B", sep=""), format="text")
-writeMM(as(C,"CsparseMatrix"), paste(args[6], "C", sep=""), format="text")
-writeMM(as(D,"CsparseMatrix"), paste(args[6], "D", sep=""), format="text")
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.matrix(A1);
+
+B=A[args[2]:args[3],args[4]:args[5]]
+C=A[1:args[3],args[4]:ncol(A)]
+D=A[,args[4]:args[5]]
+writeMM(as(B,"CsparseMatrix"), paste(args[6], "B", sep=""), format="text")
+writeMM(as(C,"CsparseMatrix"), paste(args[6], "C", sep=""), format="text")
+writeMM(as(D,"CsparseMatrix"), paste(args[6], "D", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/RightIndexingMatrixTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/RightIndexingMatrixTest.dml b/src/test/scripts/functions/indexing/RightIndexingMatrixTest.dml
index f6248ea..a69c235 100644
--- a/src/test/scripts/functions/indexing/RightIndexingMatrixTest.dml
+++ b/src/test/scripts/functions/indexing/RightIndexingMatrixTest.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A=read($1, rows=$2, cols=$3, format="text")
-B=A[$4:$5,$6:$7]
-C=A[1:$5,$6:ncol(A)]
-D=A[,$6:$7]
-write(B, $8, format="text")
-write(C, $9, format="text")
-write(D, $10, format="text")
+
+A=read($1, rows=$2, cols=$3, format="text")
+B=A[$4:$5,$6:$7]
+C=A[1:$5,$6:ncol(A)]
+D=A[,$6:$7]
+write(B, $8, format="text")
+write(C, $9, format="text")
+write(D, $10, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/RightIndexingVectorTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/RightIndexingVectorTest.R b/src/test/scripts/functions/indexing/RightIndexingVectorTest.R
index 916b8ff..a481a79 100644
--- a/src/test/scripts/functions/indexing/RightIndexingVectorTest.R
+++ b/src/test/scripts/functions/indexing/RightIndexingVectorTest.R
@@ -19,18 +19,18 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A1=readMM(paste(args[1], "A.mtx", sep=""))
-A = as.matrix(A1);
-
-B=A[1:(nrow(A)-10),7]
-C=A[ ,7]
-D=t(A[7, ]) #R outputs col vector
-
-writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""), format="text")
-writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
-writeMM(as(D,"CsparseMatrix"), paste(args[2], "D", sep=""), format="text")
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A1=readMM(paste(args[1], "A.mtx", sep=""))
+A = as.matrix(A1);
+
+B=A[1:(nrow(A)-10),7]
+C=A[ ,7]
+D=t(A[7, ]) #R outputs col vector
+
+writeMM(as(B,"CsparseMatrix"), paste(args[2], "B", sep=""), format="text")
+writeMM(as(C,"CsparseMatrix"), paste(args[2], "C", sep=""), format="text")
+writeMM(as(D,"CsparseMatrix"), paste(args[2], "D", sep=""), format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/indexing/RightIndexingVectorTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/indexing/RightIndexingVectorTest.dml b/src/test/scripts/functions/indexing/RightIndexingVectorTest.dml
index 977210c..cd25d00 100644
--- a/src/test/scripts/functions/indexing/RightIndexingVectorTest.dml
+++ b/src/test/scripts/functions/indexing/RightIndexingVectorTest.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-A=read($1, rows=$2, cols=$3, format="text")
-B=A[1:(nrow(A)-10),7] #not vrix
-C=A[ ,7] #vrix col
-D=A[7, ] #vrix row
-write(B, $4, format="text")
-write(C, $5, format="text")
-write(D, $6, format="text")
+
+A=read($1, rows=$2, cols=$3, format="text")
+B=A[1:(nrow(A)-10),7] #not vrix
+C=A[ ,7] #vrix col
+D=A[7, ] #vrix row
+write(B, $4, format="text")
+write(C, $5, format="text")
+write(D, $6, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/ScalarComputeWrite.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/ScalarComputeWrite.dml b/src/test/scripts/functions/io/ScalarComputeWrite.dml
index 4f3b799..314ecaf 100644
--- a/src/test/scripts/functions/io/ScalarComputeWrite.dml
+++ b/src/test/scripts/functions/io/ScalarComputeWrite.dml
@@ -1,32 +1,32 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * DML script to test the scalar write where the scalar is the result of some computation
- * $1 - input value
- * $2 - filename to which scalr needs to be written out
- */
-
-r = $1;
-X = matrix(1.0, rows=10, cols=1);
-X = X*r;
-m = max(X);
-write(m, $2);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * DML script to test the scalar write where the scalar is the result of some computation
+ * $1 - input value
+ * $2 - filename to which scalr needs to be written out
+ */
+
+r = $1;
+X = matrix(1.0, rows=10, cols=1);
+X = X*r;
+m = max(X);
+write(m, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/ScalarRead.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/ScalarRead.dml b/src/test/scripts/functions/io/ScalarRead.dml
index 8834894..edf5bcc 100644
--- a/src/test/scripts/functions/io/ScalarRead.dml
+++ b/src/test/scripts/functions/io/ScalarRead.dml
@@ -1,23 +1,23 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-s = read($1, value_type=$2);
-print(s);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+s = read($1, value_type=$2);
+print(s);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/ScalarWrite.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/ScalarWrite.dml b/src/test/scripts/functions/io/ScalarWrite.dml
index fd405be..f5a7454 100644
--- a/src/test/scripts/functions/io/ScalarWrite.dml
+++ b/src/test/scripts/functions/io/ScalarWrite.dml
@@ -19,11 +19,11 @@
 #
 #-------------------------------------------------------------
 
-
-/*
- * DML script to test the scalar write:
- * $1 - input scalar value
- * $2 - filename to which scalar needs to be written out
- */
-x = $1
-write(x, $2);
+
+/*
+ * DML script to test the scalar write:
+ * $1 - input scalar value
+ * $2 - filename to which scalar needs to be written out
+ */
+x = $1
+write(x, $2);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/SeqParReadTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/SeqParReadTest.dml b/src/test/scripts/functions/io/SeqParReadTest.dml
index 1a31c7b..5a054a7 100644
--- a/src/test/scripts/functions/io/SeqParReadTest.dml
+++ b/src/test/scripts/functions/io/SeqParReadTest.dml
@@ -19,9 +19,9 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1);
-x = sum(A);
-write(x, $2);
-
-
+
+A = read($1);
+x = sum(A);
+write(x, $2);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/ReadCSVTest_1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/ReadCSVTest_1.dml b/src/test/scripts/functions/io/csv/ReadCSVTest_1.dml
index 3659180..8b29c64 100644
--- a/src/test/scripts/functions/io/csv/ReadCSVTest_1.dml
+++ b/src/test/scripts/functions/io/csv/ReadCSVTest_1.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# DML script that tests read csv
-
-A = read($1);
-x = sum(A);
-write(x, $2);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script that tests read csv
+
+A = read($1);
+x = sum(A);
+write(x, $2);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/ReadCSVTest_2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/ReadCSVTest_2.dml b/src/test/scripts/functions/io/csv/ReadCSVTest_2.dml
index af0de14..92e182c 100644
--- a/src/test/scripts/functions/io/csv/ReadCSVTest_2.dml
+++ b/src/test/scripts/functions/io/csv/ReadCSVTest_2.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# DML script that tests read csv
-
-A = read($1, format="csv", header=TRUE);
-x = sum(A);
-write(x, $2);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script that tests read csv
+
+A = read($1, format="csv", header=TRUE);
+x = sum(A);
+write(x, $2);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/ReadCSVTest_3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/ReadCSVTest_3.dml b/src/test/scripts/functions/io/csv/ReadCSVTest_3.dml
index 3659180..8b29c64 100644
--- a/src/test/scripts/functions/io/csv/ReadCSVTest_3.dml
+++ b/src/test/scripts/functions/io/csv/ReadCSVTest_3.dml
@@ -1,27 +1,27 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# DML script that tests read csv
-
-A = read($1);
-x = sum(A);
-write(x, $2);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script that tests read csv
+
+A = read($1);
+x = sum(A);
+write(x, $2);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/WriteCSVTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/WriteCSVTest.dml b/src/test/scripts/functions/io/csv/WriteCSVTest.dml
index 60eddd0..f40ed9b 100644
--- a/src/test/scripts/functions/io/csv/WriteCSVTest.dml
+++ b/src/test/scripts/functions/io/csv/WriteCSVTest.dml
@@ -1,28 +1,28 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# DML script that tests read/write csv
-
-A = read($1+".data");
-x = sum(A);
-write(x, $2);
-
-write(A, $3, format="csv", header=$4, sep=$5, sparse=$6);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script that tests read/write csv
+
+A = read($1+".data");
+x = sum(A);
+write(x, $2);
+
+write(A, $3, format="csv", header=$4, sep=$5, sparse=$6);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csv_test.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csv_test.dml b/src/test/scripts/functions/io/csv/csv_test.dml
index 0656bf7..aa1d61f 100644
--- a/src/test/scripts/functions/io/csv/csv_test.dml
+++ b/src/test/scripts/functions/io/csv/csv_test.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-
-# test for reading and writing in CSV format
-
-A = read($1, format=$2);
-write(A, $3, format=$4);
-
+
+
+# test for reading and writing in CSV format
+
+A = read($1, format=$2);
+write(A, $3, format=$4);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csv_verify.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csv_verify.R b/src/test/scripts/functions/io/csv/csv_verify.R
index c5fc6c8..69bed6e 100644
--- a/src/test/scripts/functions/io/csv/csv_verify.R
+++ b/src/test/scripts/functions/io/csv/csv_verify.R
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library(Matrix);
-
-# R interprets "0" as a categorical ("factor") value, so we need to read the
-# file in as strings and convert everything to numeric explicitly.
-A = read.csv(args[1], header=FALSE, stringsAsFactors=FALSE);
-A = sapply(A, as.numeric);
-x = sum(A);
-write(x, args[2]);
-
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library(Matrix);
+
+# R interprets "0" as a categorical ("factor") value, so we need to read the
+# file in as strings and convert everything to numeric explicitly.
+A = read.csv(args[1], header=FALSE, stringsAsFactors=FALSE);
+A = sapply(A, as.numeric);
+x = sum(A);
+write(x, args[2]);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csv_verify.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csv_verify.dml b/src/test/scripts/functions/io/csv/csv_verify.dml
index 3526fbc..190d359 100644
--- a/src/test/scripts/functions/io/csv/csv_verify.dml
+++ b/src/test/scripts/functions/io/csv/csv_verify.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-
-A = read($1, rows=$2, cols=$3, format=$4);
-x = sum(A);
-write(x, $5);
-
-
+
+
+A = read($1, rows=$2, cols=$3, format=$4);
+x = sum(A);
+write(x, $5);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csv_verify2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csv_verify2.R b/src/test/scripts/functions/io/csv/csv_verify2.R
index b2ce726..a5af6f8 100644
--- a/src/test/scripts/functions/io/csv/csv_verify2.R
+++ b/src/test/scripts/functions/io/csv/csv_verify2.R
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library(Matrix);
-
-A = read.csv(args[1]);
-x = sum(A);
-write(x, args[2]);
-
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library(Matrix);
+
+A = read.csv(args[1]);
+x = sum(A);
+write(x, args[2]);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csvprop_read.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csvprop_read.dml b/src/test/scripts/functions/io/csv/csvprop_read.dml
index 11ca30e..a86a3ab 100644
--- a/src/test/scripts/functions/io/csv/csvprop_read.dml
+++ b/src/test/scripts/functions/io/csv/csvprop_read.dml
@@ -19,17 +19,17 @@
 #
 #-------------------------------------------------------------
 
-
-
-# test for reading and writing in CSV format
-
-Atxt = read($1);
-Acsv = read($2, format="csv", header=$3, sep=$4, fill=$5, default=$6);
-
-Diff = Atxt - Acsv;
-
-s = sum(Diff);
-
-write(s, $7);
-
-
+
+
+# test for reading and writing in CSV format
+
+Atxt = read($1);
+Acsv = read($2, format="csv", header=$3, sep=$4, fill=$5, default=$6);
+
+Diff = Atxt - Acsv;
+
+s = sum(Diff);
+
+write(s, $7);
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/csvprop_write.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/csvprop_write.dml b/src/test/scripts/functions/io/csv/csvprop_write.dml
index 8b9b7ed..4baa586 100644
--- a/src/test/scripts/functions/io/csv/csvprop_write.dml
+++ b/src/test/scripts/functions/io/csv/csvprop_write.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-
-# test for reading and writing in CSV format
-
-A = read($1);
-write(A, $2, format="csv", header=$3, sep=$4, sparse=$5);
-
+
+
+# test for reading and writing in CSV format
+
+A = read($1);
+write(A, $2, format="csv", header=$3, sep=$4, sparse=$5);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/io/csv/in/transfusion_1.data
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/io/csv/in/transfusion_1.data b/src/test/scripts/functions/io/csv/in/transfusion_1.data
index 1ec0901..1a35bfe 100644
--- a/src/test/scripts/functions/io/csv/in/transfusion_1.data
+++ b/src/test/scripts/functions/io/csv/in/transfusion_1.data
@@ -1,749 +1,749 @@
-Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
-2 ,50,12500,98 ,1
-0 ,13,3250,28 ,1
-1 ,16,4000,35 ,1
-2 ,20,5000,45 ,1
-1 ,24,6000,77 ,0
-4 ,4,1000,4 ,0
-2 ,7,1750,14 ,1
-1 ,12,3000,35 ,0
-2 ,9,2250,22 ,1
-5 ,46,11500,98 ,1
-4 ,23,5750,58 ,0
-0 ,3,750,4 ,0
-2 ,10,2500,28 ,1
-1 ,13,3250,47 ,0
-2 ,6,1500,15 ,1
-2 ,5,1250,11 ,1
-2 ,14,3500,48 ,1
-2 ,15,3750,49 ,1
-2 ,6,1500,15 ,1
-2 ,3,750,4 ,1
-2 ,3,750,4 ,1
-4 ,11,2750,28 ,0
-2 ,6,1500,16 ,1
-2 ,6,1500,16 ,1
-9 ,9,2250,16 ,0
-4 ,14,3500,40 ,0
-4 ,6,1500,14 ,0
-4 ,12,3000,34 ,1
-4 ,5,1250,11 ,1
-4 ,8,2000,21 ,0
-1 ,14,3500,58 ,0
-4 ,10,2500,28 ,1
-4 ,10,2500,28 ,1
-4 ,9,2250,26 ,1
-2 ,16,4000,64 ,0
-2 ,8,2000,28 ,1
-2 ,12,3000,47 ,1
-4 ,6,1500,16 ,1
-2 ,14,3500,57 ,1
-4 ,7,1750,22 ,1
-2 ,13,3250,53 ,1
-2 ,5,1250,16 ,0
-2 ,5,1250,16 ,1
-2 ,5,1250,16 ,0
-4 ,20,5000,69 ,1
-4 ,9,2250,28 ,1
-2 ,9,2250,36 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,2,500,2 ,0
-2 ,11,2750,46 ,0
-2 ,11,2750,46 ,1
-2 ,6,1500,22 ,0
-2 ,12,3000,52 ,0
-4 ,5,1250,14 ,1
-4 ,19,4750,69 ,1
-4 ,8,2000,26 ,1
-2 ,7,1750,28 ,1
-2 ,16,4000,81 ,0
-3 ,6,1500,21 ,0
-2 ,7,1750,29 ,0
-2 ,8,2000,35 ,1
-2 ,10,2500,49 ,0
-4 ,5,1250,16 ,1
-2 ,3,750,9 ,1
-3 ,16,4000,74 ,0
-2 ,4,1000,14 ,1
-0 ,2,500,4 ,0
-4 ,7,1750,25 ,0
-1 ,9,2250,51 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-4 ,17,4250,71 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,1
-2 ,2,500,4 ,1
-2 ,4,1000,16 ,1
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-2 ,2,500,4 ,0
-4 ,6,1500,23 ,1
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,4,1000,16 ,0
-2 ,6,1500,28 ,1
-2 ,6,1500,28 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-2 ,7,1750,35 ,1
-4 ,2,500,4 ,1
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-4 ,2,500,4 ,0
-12 ,11,2750,23 ,0
-4 ,7,1750,28 ,0
-3 ,17,4250,86 ,0
-4 ,9,2250,38 ,1
-4 ,4,1000,14 ,1
-5 ,7,1750,26 ,1
-4 ,8,2000,34 ,1
-2 ,13,3250,76 ,1
-4 ,9,2250,40 ,0
-2 ,5,1250,26 ,0
-2 ,5,1250,26 ,0
-6 ,17,4250,70 ,0
-0 ,8,2000,59 ,0
-3 ,5,1250,26 ,0
-2 ,3,750,14 ,0
-2 ,10,2500,64 ,0
-4 ,5,1250,23 ,1
-4 ,9,2250,46 ,0
-4 ,5,1250,23 ,0
-4 ,8,2000,40 ,1
-2 ,12,3000,82 ,0
-11 ,24,6000,64 ,0
-2 ,7,1750,46 ,1
-4 ,11,2750,61 ,0
-1 ,7,1750,57 ,0
-2 ,11,2750,79 ,1
-2 ,3,750,16 ,1
-4 ,5,1250,26 ,1
-2 ,6,1500,41 ,1
-2 ,5,1250,33 ,1
-2 ,4,1000,26 ,0
-2 ,5,1250,34 ,0
-4 ,8,2000,46 ,1
-2 ,4,1000,26 ,0
-4 ,8,2000,48 ,1
-2 ,2,500,10 ,1
-4 ,5,1250,28 ,0
-2 ,12,3000,95 ,0
-2 ,2,500,10 ,0
-4 ,6,1500,35 ,0
-2 ,11,2750,88 ,0
-2 ,3,750,19 ,0
-2 ,5,1250,37 ,0
-2 ,12,3000,98 ,0
-9 ,5,1250,19 ,0
-2 ,2,500,11 ,0
-2 ,9,2250,74 ,0
-5 ,14,3500,86 ,0
-4 ,3,750,16 ,0
-4 ,3,750,16 ,0
-4 ,2,500,9 ,1
-4 ,3,750,16 ,1
-6 ,3,750,14 ,0
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,7,1750,58 ,1
-4 ,6,1500,39 ,0
-4 ,11,2750,78 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,10,2500,35 ,0
-11 ,4,1000,16 ,1
-4 ,5,1250,33 ,1
-4 ,6,1500,41 ,1
-2 ,3,750,22 ,0
-4 ,4,1000,26 ,1
-10 ,4,1000,16 ,0
-2 ,4,1000,35 ,0
-4 ,12,3000,88 ,0
-13 ,8,2000,26 ,0
-11 ,9,2250,33 ,0
-4 ,5,1250,34 ,0
-4 ,4,1000,26 ,0
-8 ,15,3750,77 ,0
-4 ,5,1250,35 ,1
-4 ,7,1750,52 ,0
-4 ,7,1750,52 ,0
-2 ,4,1000,35 ,0
-11 ,11,2750,42 ,0
-2 ,2,500,14 ,0
-2 ,5,1250,47 ,1
-9 ,8,2000,38 ,1
-4 ,6,1500,47 ,0
-11 ,7,1750,29 ,0
-9 ,9,2250,45 ,0
-4 ,6,1500,52 ,0
-4 ,7,1750,58 ,0
-6 ,2,500,11 ,1
-4 ,7,1750,58 ,0
-11 ,9,2250,38 ,0
-11 ,6,1500,26 ,0
-2 ,2,500,16 ,0
-2 ,7,1750,76 ,0
-11 ,6,1500,27 ,0
-11 ,3,750,14 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,3,750,24 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-10 ,8,2000,39 ,0
-14 ,7,1750,26 ,0
-8 ,10,2500,63 ,0
-11 ,3,750,15 ,0
-4 ,2,500,14 ,0
-2 ,4,1000,43 ,0
-8 ,9,2250,58 ,0
-8 ,8,2000,52 ,1
-11 ,22,5500,98 ,0
-4 ,3,750,25 ,1
-11 ,17,4250,79 ,1
-9 ,2,500,11 ,0
-4 ,5,1250,46 ,0
-11 ,12,3000,58 ,0
-7 ,12,3000,86 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-11 ,2,500,11 ,0
-2 ,6,1500,75 ,0
-11 ,8,2000,41 ,1
-11 ,3,750,16 ,1
-12 ,13,3250,59 ,0
-2 ,3,750,35 ,0
-16 ,8,2000,28 ,0
-11 ,7,1750,37 ,0
-4 ,3,750,28 ,0
-12 ,12,3000,58 ,0
-4 ,4,1000,41 ,0
-11 ,14,3500,73 ,1
-2 ,2,500,23 ,0
-2 ,3,750,38 ,1
-4 ,5,1250,58 ,0
-4 ,4,1000,43 ,1
-3 ,2,500,23 ,0
-11 ,8,2000,46 ,0
-4 ,7,1750,82 ,0
-13 ,4,1000,21 ,0
-16 ,11,2750,40 ,0
-16 ,7,1750,28 ,0
-7 ,2,500,16 ,0
-4 ,5,1250,58 ,0
-4 ,5,1250,58 ,0
-4 ,4,1000,46 ,0
-14 ,13,3250,57 ,0
-4 ,3,750,34 ,0
-14 ,18,4500,78 ,0
-11 ,8,2000,48 ,0
-14 ,16,4000,70 ,0
-14 ,4,1000,22 ,1
-14 ,5,1250,26 ,0
-8 ,2,500,16 ,0
-11 ,5,1250,33 ,0
-11 ,2,500,14 ,0
-4 ,2,500,23 ,0
-9 ,2,500,16 ,1
-14 ,5,1250,28 ,1
-14 ,3,750,19 ,1
-14 ,4,1000,23 ,1
-16 ,12,3000,50 ,0
-11 ,4,1000,28 ,0
-11 ,5,1250,35 ,0
-11 ,5,1250,35 ,0
-2 ,4,1000,70 ,0
-14 ,5,1250,28 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,3,750,52 ,0
-14 ,6,1500,34 ,0
-11 ,5,1250,37 ,1
-4 ,5,1250,74 ,0
-11 ,3,750,23 ,0
-16 ,4,1000,23 ,0
-16 ,3,750,19 ,0
-11 ,5,1250,38 ,0
-11 ,2,500,16 ,0
-12 ,9,2250,60 ,0
-9 ,1,250,9 ,0
-9 ,1,250,9 ,0
-4 ,2,500,29 ,0
-11 ,2,500,17 ,0
-14 ,4,1000,26 ,0
-11 ,9,2250,72 ,1
-11 ,5,1250,41 ,0
-15 ,16,4000,82 ,0
-9 ,5,1250,51 ,1
-11 ,4,1000,34 ,0
-14 ,8,2000,50 ,1
-16 ,7,1750,38 ,0
-14 ,2,500,16 ,0
-2 ,2,500,41 ,0
-14 ,16,4000,98 ,0
-14 ,4,1000,28 ,1
-16 ,7,1750,39 ,0
-14 ,7,1750,47 ,0
-16 ,6,1500,35 ,0
-16 ,6,1500,35 ,1
-11 ,7,1750,62 ,1
-16 ,2,500,16 ,0
-16 ,3,750,21 ,1
-11 ,3,750,28 ,0
-11 ,7,1750,64 ,0
-11 ,1,250,11 ,1
-9 ,3,750,34 ,0
-14 ,4,1000,30 ,0
-23 ,38,9500,98 ,0
-11 ,6,1500,58 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,2,500,21 ,0
-11 ,5,1250,50 ,0
-11 ,2,500,21 ,0
-16 ,4,1000,28 ,0
-4 ,2,500,41 ,0
-16 ,6,1500,40 ,0
-14 ,3,750,26 ,0
-9 ,2,500,26 ,0
-21 ,16,4000,64 ,0
-14 ,6,1500,51 ,0
-11 ,2,500,24 ,0
-4 ,3,750,71 ,0
-21 ,13,3250,57 ,0
-11 ,6,1500,71 ,0
-14 ,2,500,21 ,1
-23 ,15,3750,57 ,0
-14 ,4,1000,38 ,0
-11 ,2,500,26 ,0
-16 ,5,1250,40 ,1
-4 ,2,500,51 ,1
-14 ,3,750,31 ,0
-4 ,2,500,52 ,0
-9 ,4,1000,65 ,0
-14 ,4,1000,40 ,0
-11 ,3,750,40 ,1
-14 ,5,1250,50 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,7,1750,72 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-9 ,3,750,52 ,0
-14 ,7,1750,73 ,0
-11 ,4,1000,58 ,0
-11 ,4,1000,59 ,0
-4 ,2,500,59 ,0
-11 ,4,1000,61 ,0
-16 ,4,1000,40 ,0
-16 ,10,2500,89 ,0
-21 ,2,500,21 ,1
-21 ,3,750,26 ,0
-16 ,8,2000,76 ,0
-21 ,3,750,26 ,1
-18 ,2,500,23 ,0
-23 ,5,1250,33 ,0
-23 ,8,2000,46 ,0
-16 ,3,750,34 ,0
-14 ,5,1250,64 ,0
-14 ,3,750,41 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,4,1000,45 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,2,500,26 ,0
-21 ,2,500,23 ,0
-16 ,2,500,27 ,0
-21 ,2,500,23 ,0
-21 ,2,500,23 ,0
-14 ,4,1000,57 ,0
-16 ,5,1250,60 ,0
-23 ,2,500,23 ,0
-14 ,5,1250,74 ,0
-23 ,3,750,28 ,0
-16 ,3,750,40 ,0
-9 ,2,500,52 ,0
-9 ,2,500,52 ,0
-16 ,7,1750,87 ,1
-14 ,4,1000,64 ,0
-14 ,2,500,35 ,0
-16 ,7,1750,93 ,0
-21 ,2,500,25 ,0
-14 ,3,750,52 ,0
-23 ,14,3500,93 ,0
-18 ,8,2000,95 ,0
-16 ,3,750,46 ,0
-11 ,3,750,76 ,0
-11 ,2,500,52 ,0
-11 ,3,750,76 ,0
-23 ,12,3000,86 ,0
-21 ,3,750,35 ,0
-23 ,2,500,26 ,0
-23 ,2,500,26 ,0
-23 ,8,2000,64 ,0
-16 ,3,750,50 ,0
-23 ,3,750,33 ,0
-21 ,3,750,38 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,1
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-21 ,5,1250,60 ,0
-23 ,4,1000,45 ,0
-21 ,4,1000,52 ,0
-22 ,1,250,22 ,1
-11 ,2,500,70 ,0
-23 ,5,1250,58 ,0
-23 ,3,750,40 ,0
-23 ,3,750,41 ,0
-14 ,3,750,83 ,0
-21 ,2,500,35 ,0
-26 ,5,1250,49 ,1
-23 ,6,1500,70 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,53 ,0
-21 ,6,1500,86 ,0
-23 ,3,750,48 ,0
-21 ,2,500,41 ,0
-21 ,3,750,64 ,0
-16 ,2,500,70 ,0
-21 ,3,750,70 ,0
-23 ,4,1000,87 ,0
-23 ,3,750,89 ,0
-23 ,2,500,87 ,0
-35 ,3,750,64 ,0
-38 ,1,250,38 ,0
-38 ,1,250,38 ,0
-40 ,1,250,40 ,0
-74 ,1,250,74 ,0
-2 ,43,10750,86 ,1
-6 ,22,5500,28 ,1
-2 ,34,8500,77 ,1
-2 ,44,11000,98 ,0
-0 ,26,6500,76 ,1
-2 ,41,10250,98 ,1
-3 ,21,5250,42 ,1
-2 ,11,2750,23 ,0
-2 ,21,5250,52 ,1
-2 ,13,3250,32 ,1
-4 ,4,1000,4 ,1
-2 ,11,2750,26 ,0
-2 ,11,2750,28 ,0
-3 ,14,3500,35 ,0
-4 ,16,4000,38 ,1
-4 ,6,1500,14 ,0
-3 ,5,1250,12 ,1
-4 ,33,8250,98 ,1
-3 ,10,2500,33 ,1
-4 ,10,2500,28 ,1
-2 ,11,2750,40 ,1
-2 ,11,2750,41 ,1
-4 ,13,3250,39 ,1
-1 ,10,2500,43 ,1
-4 ,9,2250,28 ,0
-2 ,4,1000,11 ,0
-2 ,5,1250,16 ,1
-2 ,15,3750,64 ,0
-5 ,24,6000,79 ,0
-2 ,6,1500,22 ,1
-4 ,5,1250,16 ,1
-2 ,4,1000,14 ,1
-4 ,8,2000,28 ,0
-2 ,4,1000,14 ,0
-2 ,6,1500,26 ,0
-4 ,5,1250,16 ,1
-2 ,7,1750,32 ,1
-2 ,6,1500,26 ,1
-2 ,8,2000,38 ,1
-2 ,2,500,4 ,1
-2 ,6,1500,28 ,1
-2 ,10,2500,52 ,0
-4 ,16,4000,70 ,1
-4 ,2,500,4 ,1
-1 ,14,3500,95 ,0
-4 ,2,500,4 ,1
-7 ,14,3500,48 ,0
-2 ,3,750,11 ,0
-2 ,12,3000,70 ,1
-4 ,7,1750,32 ,1
-4 ,4,1000,16 ,0
-2 ,6,1500,35 ,1
-4 ,6,1500,28 ,1
-2 ,3,750,14 ,0
-2 ,4,1000,23 ,0
-4 ,4,1000,18 ,0
-5 ,6,1500,28 ,0
-4 ,6,1500,30 ,0
-14 ,5,1250,14 ,0
-3 ,8,2000,50 ,0
-4 ,11,2750,64 ,1
-4 ,9,2250,52 ,0
-4 ,16,4000,98 ,1
-7 ,10,2500,47 ,0
-4 ,14,3500,86 ,0
-2 ,9,2250,75 ,0
-4 ,6,1500,35 ,0
-4 ,9,2250,55 ,0
-4 ,6,1500,35 ,1
-2 ,6,1500,45 ,0
-2 ,6,1500,47 ,0
-4 ,2,500,9 ,0
-2 ,2,500,11 ,1
-2 ,2,500,11 ,0
-2 ,2,500,11 ,1
-4 ,6,1500,38 ,1
-3 ,4,1000,29 ,1
-9 ,9,2250,38 ,0
-11 ,5,1250,18 ,0
-2 ,3,750,21 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,1
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-2 ,1,250,2 ,0
-11 ,11,2750,38 ,0
-2 ,3,750,22 ,0
-9 ,11,2750,49 ,1
-5 ,11,2750,75 ,0
-3 ,5,1250,38 ,0
-3 ,1,250,3 ,1
-4 ,6,1500,43 ,0
-2 ,3,750,24 ,0
-12 ,11,2750,39 ,0
-2 ,2,500,14 ,0
-4 ,6,1500,46 ,0
-9 ,3,750,14 ,0
-14 ,8,2000,26 ,0
-4 ,2,500,13 ,0
-4 ,11,2750,95 ,0
-2 ,7,1750,77 ,0
-2 ,7,1750,77 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,0
-4 ,1,250,4 ,1
-4 ,1,250,4 ,0
-4 ,7,1750,62 ,0
-4 ,1,250,4 ,0
-4 ,4,1000,34 ,1
-11 ,6,1500,28 ,0
-13 ,3,750,14 ,1
-7 ,5,1250,35 ,0
-9 ,9,2250,54 ,0
-11 ,2,500,11 ,0
-2 ,5,1250,63 ,0
-7 ,11,2750,89 ,0
-8 ,9,2250,64 ,0
-2 ,2,500,22 ,0
-6 ,3,750,26 ,0
-12 ,15,3750,71 ,0
-13 ,3,750,16 ,0
-11 ,16,4000,89 ,0
-4 ,5,1250,58 ,0
-14 ,7,1750,35 ,0
-11 ,4,1000,27 ,0
-7 ,9,2250,89 ,1
-11 ,8,2000,52 ,1
-7 ,5,1250,52 ,0
-11 ,6,1500,41 ,0
-10 ,5,1250,38 ,0
-14 ,2,500,14 ,1
-14 ,2,500,14 ,0
-14 ,2,500,14 ,0
-2 ,2,500,33 ,0
-11 ,3,750,23 ,0
-14 ,8,2000,46 ,0
-9 ,1,250,9 ,0
-16 ,5,1250,27 ,0
-14 ,4,1000,26 ,0
-4 ,2,500,30 ,0
-14 ,3,750,21 ,0
-16 ,16,4000,77 ,0
-4 ,2,500,31 ,0
-14 ,8,2000,50 ,0
-11 ,3,750,26 ,0
-14 ,7,1750,45 ,0
-15 ,5,1250,33 ,0
-16 ,2,500,16 ,0
-16 ,3,750,21 ,0
-11 ,8,2000,72 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,0
-11 ,1,250,11 ,1
-11 ,1,250,11 ,0
-2 ,3,750,75 ,1
-2 ,3,750,77 ,0
-16 ,4,1000,28 ,0
-16 ,15,3750,87 ,0
-16 ,14,3500,83 ,0
-16 ,10,2500,62 ,0
-16 ,3,750,23 ,0
-14 ,3,750,26 ,0
-23 ,19,4750,62 ,0
-11 ,7,1750,75 ,0
-14 ,3,750,28 ,0
-20 ,14,3500,69 ,1
-4 ,2,500,46 ,0
-11 ,2,500,25 ,0
-11 ,3,750,37 ,0
-16 ,4,1000,33 ,0
-21 ,7,1750,38 ,0
-13 ,7,1750,76 ,0
-16 ,6,1500,50 ,0
-14 ,3,750,33 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-14 ,1,250,14 ,0
-17 ,7,1750,58 ,1
-14 ,3,750,35 ,0
-14 ,3,750,35 ,0
-16 ,7,1750,64 ,0
-21 ,2,500,21 ,0
-16 ,3,750,35 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-16 ,1,250,16 ,0
-14 ,2,500,29 ,0
-11 ,4,1000,74 ,0
-11 ,2,500,38 ,1
-21 ,6,1500,48 ,0
-23 ,2,500,23 ,0
-23 ,6,1500,45 ,0
-14 ,2,500,35 ,1
-16 ,6,1500,81 ,0
-16 ,4,1000,58 ,0
-16 ,5,1250,71 ,0
-21 ,2,500,26 ,0
-21 ,3,750,35 ,0
-21 ,3,750,35 ,0
-23 ,8,2000,69 ,0
-21 ,3,750,38 ,0
-23 ,3,750,35 ,0
-21 ,3,750,40 ,0
-23 ,2,500,28 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-25 ,6,1500,50 ,0
-21 ,1,250,21 ,0
-21 ,1,250,21 ,0
-23 ,3,750,39 ,0
-21 ,2,500,33 ,0
-14 ,3,750,79 ,0
-23 ,1,250,23 ,1
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,1,250,23 ,0
-23 ,4,1000,52 ,0
-23 ,1,250,23 ,0
-23 ,7,1750,88 ,0
-16 ,3,750,86 ,0
-23 ,2,500,38 ,0
-21 ,2,500,52 ,0
-23 ,3,750,62 ,0
-39 ,1,250,39 ,0
+Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
+2 ,50,12500,98 ,1
+0 ,13,3250,28 ,1
+1 ,16,4000,35 ,1
+2 ,20,5000,45 ,1
+1 ,24,6000,77 ,0
+4 ,4,1000,4 ,0
+2 ,7,1750,14 ,1
+1 ,12,3000,35 ,0
+2 ,9,2250,22 ,1
+5 ,46,11500,98 ,1
+4 ,23,5750,58 ,0
+0 ,3,750,4 ,0
+2 ,10,2500,28 ,1
+1 ,13,3250,47 ,0
+2 ,6,1500,15 ,1
+2 ,5,1250,11 ,1
+2 ,14,3500,48 ,1
+2 ,15,3750,49 ,1
+2 ,6,1500,15 ,1
+2 ,3,750,4 ,1
+2 ,3,750,4 ,1
+4 ,11,2750,28 ,0
+2 ,6,1500,16 ,1
+2 ,6,1500,16 ,1
+9 ,9,2250,16 ,0
+4 ,14,3500,40 ,0
+4 ,6,1500,14 ,0
+4 ,12,3000,34 ,1
+4 ,5,1250,11 ,1
+4 ,8,2000,21 ,0
+1 ,14,3500,58 ,0
+4 ,10,2500,28 ,1
+4 ,10,2500,28 ,1
+4 ,9,2250,26 ,1
+2 ,16,4000,64 ,0
+2 ,8,2000,28 ,1
+2 ,12,3000,47 ,1
+4 ,6,1500,16 ,1
+2 ,14,3500,57 ,1
+4 ,7,1750,22 ,1
+2 ,13,3250,53 ,1
+2 ,5,1250,16 ,0
+2 ,5,1250,16 ,1
+2 ,5,1250,16 ,0
+4 ,20,5000,69 ,1
+4 ,9,2250,28 ,1
+2 ,9,2250,36 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,2,500,2 ,0
+2 ,11,2750,46 ,0
+2 ,11,2750,46 ,1
+2 ,6,1500,22 ,0
+2 ,12,3000,52 ,0
+4 ,5,1250,14 ,1
+4 ,19,4750,69 ,1
+4 ,8,2000,26 ,1
+2 ,7,1750,28 ,1
+2 ,16,4000,81 ,0
+3 ,6,1500,21 ,0
+2 ,7,1750,29 ,0
+2 ,8,2000,35 ,1
+2 ,10,2500,49 ,0
+4 ,5,1250,16 ,1
+2 ,3,750,9 ,1
+3 ,16,4000,74 ,0
+2 ,4,1000,14 ,1
+0 ,2,500,4 ,0
+4 ,7,1750,25 ,0
+1 ,9,2250,51 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+4 ,17,4250,71 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,1
+2 ,2,500,4 ,1
+2 ,4,1000,16 ,1
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+2 ,2,500,4 ,0
+4 ,6,1500,23 ,1
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,4,1000,16 ,0
+2 ,6,1500,28 ,1
+2 ,6,1500,28 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+2 ,7,1750,35 ,1
+4 ,2,500,4 ,1
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+4 ,2,500,4 ,0
+12 ,11,2750,23 ,0
+4 ,7,1750,28 ,0
+3 ,17,4250,86 ,0
+4 ,9,2250,38 ,1
+4 ,4,1000,14 ,1
+5 ,7,1750,26 ,1
+4 ,8,2000,34 ,1
+2 ,13,3250,76 ,1
+4 ,9,2250,40 ,0
+2 ,5,1250,26 ,0
+2 ,5,1250,26 ,0
+6 ,17,4250,70 ,0
+0 ,8,2000,59 ,0
+3 ,5,1250,26 ,0
+2 ,3,750,14 ,0
+2 ,10,2500,64 ,0
+4 ,5,1250,23 ,1
+4 ,9,2250,46 ,0
+4 ,5,1250,23 ,0
+4 ,8,2000,40 ,1
+2 ,12,3000,82 ,0
+11 ,24,6000,64 ,0
+2 ,7,1750,46 ,1
+4 ,11,2750,61 ,0
+1 ,7,1750,57 ,0
+2 ,11,2750,79 ,1
+2 ,3,750,16 ,1
+4 ,5,1250,26 ,1
+2 ,6,1500,41 ,1
+2 ,5,1250,33 ,1
+2 ,4,1000,26 ,0
+2 ,5,1250,34 ,0
+4 ,8,2000,46 ,1
+2 ,4,1000,26 ,0
+4 ,8,2000,48 ,1
+2 ,2,500,10 ,1
+4 ,5,1250,28 ,0
+2 ,12,3000,95 ,0
+2 ,2,500,10 ,0
+4 ,6,1500,35 ,0
+2 ,11,2750,88 ,0
+2 ,3,750,19 ,0
+2 ,5,1250,37 ,0
+2 ,12,3000,98 ,0
+9 ,5,1250,19 ,0
+2 ,2,500,11 ,0
+2 ,9,2250,74 ,0
+5 ,14,3500,86 ,0
+4 ,3,750,16 ,0
+4 ,3,750,16 ,0
+4 ,2,500,9 ,1
+4 ,3,750,16 ,1
+6 ,3,750,14 ,0
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,7,1750,58 ,1
+4 ,6,1500,39 ,0
+4 ,11,2750,78 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,10,2500,35 ,0
+11 ,4,1000,16 ,1
+4 ,5,1250,33 ,1
+4 ,6,1500,41 ,1
+2 ,3,750,22 ,0
+4 ,4,1000,26 ,1
+10 ,4,1000,16 ,0
+2 ,4,1000,35 ,0
+4 ,12,3000,88 ,0
+13 ,8,2000,26 ,0
+11 ,9,2250,33 ,0
+4 ,5,1250,34 ,0
+4 ,4,1000,26 ,0
+8 ,15,3750,77 ,0
+4 ,5,1250,35 ,1
+4 ,7,1750,52 ,0
+4 ,7,1750,52 ,0
+2 ,4,1000,35 ,0
+11 ,11,2750,42 ,0
+2 ,2,500,14 ,0
+2 ,5,1250,47 ,1
+9 ,8,2000,38 ,1
+4 ,6,1500,47 ,0
+11 ,7,1750,29 ,0
+9 ,9,2250,45 ,0
+4 ,6,1500,52 ,0
+4 ,7,1750,58 ,0
+6 ,2,500,11 ,1
+4 ,7,1750,58 ,0
+11 ,9,2250,38 ,0
+11 ,6,1500,26 ,0
+2 ,2,500,16 ,0
+2 ,7,1750,76 ,0
+11 ,6,1500,27 ,0
+11 ,3,750,14 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,3,750,24 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+10 ,8,2000,39 ,0
+14 ,7,1750,26 ,0
+8 ,10,2500,63 ,0
+11 ,3,750,15 ,0
+4 ,2,500,14 ,0
+2 ,4,1000,43 ,0
+8 ,9,2250,58 ,0
+8 ,8,2000,52 ,1
+11 ,22,5500,98 ,0
+4 ,3,750,25 ,1
+11 ,17,4250,79 ,1
+9 ,2,500,11 ,0
+4 ,5,1250,46 ,0
+11 ,12,3000,58 ,0
+7 ,12,3000,86 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+11 ,2,500,11 ,0
+2 ,6,1500,75 ,0
+11 ,8,2000,41 ,1
+11 ,3,750,16 ,1
+12 ,13,3250,59 ,0
+2 ,3,750,35 ,0
+16 ,8,2000,28 ,0
+11 ,7,1750,37 ,0
+4 ,3,750,28 ,0
+12 ,12,3000,58 ,0
+4 ,4,1000,41 ,0
+11 ,14,3500,73 ,1
+2 ,2,500,23 ,0
+2 ,3,750,38 ,1
+4 ,5,1250,58 ,0
+4 ,4,1000,43 ,1
+3 ,2,500,23 ,0
+11 ,8,2000,46 ,0
+4 ,7,1750,82 ,0
+13 ,4,1000,21 ,0
+16 ,11,2750,40 ,0
+16 ,7,1750,28 ,0
+7 ,2,500,16 ,0
+4 ,5,1250,58 ,0
+4 ,5,1250,58 ,0
+4 ,4,1000,46 ,0
+14 ,13,3250,57 ,0
+4 ,3,750,34 ,0
+14 ,18,4500,78 ,0
+11 ,8,2000,48 ,0
+14 ,16,4000,70 ,0
+14 ,4,1000,22 ,1
+14 ,5,1250,26 ,0
+8 ,2,500,16 ,0
+11 ,5,1250,33 ,0
+11 ,2,500,14 ,0
+4 ,2,500,23 ,0
+9 ,2,500,16 ,1
+14 ,5,1250,28 ,1
+14 ,3,750,19 ,1
+14 ,4,1000,23 ,1
+16 ,12,3000,50 ,0
+11 ,4,1000,28 ,0
+11 ,5,1250,35 ,0
+11 ,5,1250,35 ,0
+2 ,4,1000,70 ,0
+14 ,5,1250,28 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,3,750,52 ,0
+14 ,6,1500,34 ,0
+11 ,5,1250,37 ,1
+4 ,5,1250,74 ,0
+11 ,3,750,23 ,0
+16 ,4,1000,23 ,0
+16 ,3,750,19 ,0
+11 ,5,1250,38 ,0
+11 ,2,500,16 ,0
+12 ,9,2250,60 ,0
+9 ,1,250,9 ,0
+9 ,1,250,9 ,0
+4 ,2,500,29 ,0
+11 ,2,500,17 ,0
+14 ,4,1000,26 ,0
+11 ,9,2250,72 ,1
+11 ,5,1250,41 ,0
+15 ,16,4000,82 ,0
+9 ,5,1250,51 ,1
+11 ,4,1000,34 ,0
+14 ,8,2000,50 ,1
+16 ,7,1750,38 ,0
+14 ,2,500,16 ,0
+2 ,2,500,41 ,0
+14 ,16,4000,98 ,0
+14 ,4,1000,28 ,1
+16 ,7,1750,39 ,0
+14 ,7,1750,47 ,0
+16 ,6,1500,35 ,0
+16 ,6,1500,35 ,1
+11 ,7,1750,62 ,1
+16 ,2,500,16 ,0
+16 ,3,750,21 ,1
+11 ,3,750,28 ,0
+11 ,7,1750,64 ,0
+11 ,1,250,11 ,1
+9 ,3,750,34 ,0
+14 ,4,1000,30 ,0
+23 ,38,9500,98 ,0
+11 ,6,1500,58 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,2,500,21 ,0
+11 ,5,1250,50 ,0
+11 ,2,500,21 ,0
+16 ,4,1000,28 ,0
+4 ,2,500,41 ,0
+16 ,6,1500,40 ,0
+14 ,3,750,26 ,0
+9 ,2,500,26 ,0
+21 ,16,4000,64 ,0
+14 ,6,1500,51 ,0
+11 ,2,500,24 ,0
+4 ,3,750,71 ,0
+21 ,13,3250,57 ,0
+11 ,6,1500,71 ,0
+14 ,2,500,21 ,1
+23 ,15,3750,57 ,0
+14 ,4,1000,38 ,0
+11 ,2,500,26 ,0
+16 ,5,1250,40 ,1
+4 ,2,500,51 ,1
+14 ,3,750,31 ,0
+4 ,2,500,52 ,0
+9 ,4,1000,65 ,0
+14 ,4,1000,40 ,0
+11 ,3,750,40 ,1
+14 ,5,1250,50 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,7,1750,72 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+9 ,3,750,52 ,0
+14 ,7,1750,73 ,0
+11 ,4,1000,58 ,0
+11 ,4,1000,59 ,0
+4 ,2,500,59 ,0
+11 ,4,1000,61 ,0
+16 ,4,1000,40 ,0
+16 ,10,2500,89 ,0
+21 ,2,500,21 ,1
+21 ,3,750,26 ,0
+16 ,8,2000,76 ,0
+21 ,3,750,26 ,1
+18 ,2,500,23 ,0
+23 ,5,1250,33 ,0
+23 ,8,2000,46 ,0
+16 ,3,750,34 ,0
+14 ,5,1250,64 ,0
+14 ,3,750,41 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,4,1000,45 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,2,500,26 ,0
+21 ,2,500,23 ,0
+16 ,2,500,27 ,0
+21 ,2,500,23 ,0
+21 ,2,500,23 ,0
+14 ,4,1000,57 ,0
+16 ,5,1250,60 ,0
+23 ,2,500,23 ,0
+14 ,5,1250,74 ,0
+23 ,3,750,28 ,0
+16 ,3,750,40 ,0
+9 ,2,500,52 ,0
+9 ,2,500,52 ,0
+16 ,7,1750,87 ,1
+14 ,4,1000,64 ,0
+14 ,2,500,35 ,0
+16 ,7,1750,93 ,0
+21 ,2,500,25 ,0
+14 ,3,750,52 ,0
+23 ,14,3500,93 ,0
+18 ,8,2000,95 ,0
+16 ,3,750,46 ,0
+11 ,3,750,76 ,0
+11 ,2,500,52 ,0
+11 ,3,750,76 ,0
+23 ,12,3000,86 ,0
+21 ,3,750,35 ,0
+23 ,2,500,26 ,0
+23 ,2,500,26 ,0
+23 ,8,2000,64 ,0
+16 ,3,750,50 ,0
+23 ,3,750,33 ,0
+21 ,3,750,38 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,1
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+21 ,5,1250,60 ,0
+23 ,4,1000,45 ,0
+21 ,4,1000,52 ,0
+22 ,1,250,22 ,1
+11 ,2,500,70 ,0
+23 ,5,1250,58 ,0
+23 ,3,750,40 ,0
+23 ,3,750,41 ,0
+14 ,3,750,83 ,0
+21 ,2,500,35 ,0
+26 ,5,1250,49 ,1
+23 ,6,1500,70 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,53 ,0
+21 ,6,1500,86 ,0
+23 ,3,750,48 ,0
+21 ,2,500,41 ,0
+21 ,3,750,64 ,0
+16 ,2,500,70 ,0
+21 ,3,750,70 ,0
+23 ,4,1000,87 ,0
+23 ,3,750,89 ,0
+23 ,2,500,87 ,0
+35 ,3,750,64 ,0
+38 ,1,250,38 ,0
+38 ,1,250,38 ,0
+40 ,1,250,40 ,0
+74 ,1,250,74 ,0
+2 ,43,10750,86 ,1
+6 ,22,5500,28 ,1
+2 ,34,8500,77 ,1
+2 ,44,11000,98 ,0
+0 ,26,6500,76 ,1
+2 ,41,10250,98 ,1
+3 ,21,5250,42 ,1
+2 ,11,2750,23 ,0
+2 ,21,5250,52 ,1
+2 ,13,3250,32 ,1
+4 ,4,1000,4 ,1
+2 ,11,2750,26 ,0
+2 ,11,2750,28 ,0
+3 ,14,3500,35 ,0
+4 ,16,4000,38 ,1
+4 ,6,1500,14 ,0
+3 ,5,1250,12 ,1
+4 ,33,8250,98 ,1
+3 ,10,2500,33 ,1
+4 ,10,2500,28 ,1
+2 ,11,2750,40 ,1
+2 ,11,2750,41 ,1
+4 ,13,3250,39 ,1
+1 ,10,2500,43 ,1
+4 ,9,2250,28 ,0
+2 ,4,1000,11 ,0
+2 ,5,1250,16 ,1
+2 ,15,3750,64 ,0
+5 ,24,6000,79 ,0
+2 ,6,1500,22 ,1
+4 ,5,1250,16 ,1
+2 ,4,1000,14 ,1
+4 ,8,2000,28 ,0
+2 ,4,1000,14 ,0
+2 ,6,1500,26 ,0
+4 ,5,1250,16 ,1
+2 ,7,1750,32 ,1
+2 ,6,1500,26 ,1
+2 ,8,2000,38 ,1
+2 ,2,500,4 ,1
+2 ,6,1500,28 ,1
+2 ,10,2500,52 ,0
+4 ,16,4000,70 ,1
+4 ,2,500,4 ,1
+1 ,14,3500,95 ,0
+4 ,2,500,4 ,1
+7 ,14,3500,48 ,0
+2 ,3,750,11 ,0
+2 ,12,3000,70 ,1
+4 ,7,1750,32 ,1
+4 ,4,1000,16 ,0
+2 ,6,1500,35 ,1
+4 ,6,1500,28 ,1
+2 ,3,750,14 ,0
+2 ,4,1000,23 ,0
+4 ,4,1000,18 ,0
+5 ,6,1500,28 ,0
+4 ,6,1500,30 ,0
+14 ,5,1250,14 ,0
+3 ,8,2000,50 ,0
+4 ,11,2750,64 ,1
+4 ,9,2250,52 ,0
+4 ,16,4000,98 ,1
+7 ,10,2500,47 ,0
+4 ,14,3500,86 ,0
+2 ,9,2250,75 ,0
+4 ,6,1500,35 ,0
+4 ,9,2250,55 ,0
+4 ,6,1500,35 ,1
+2 ,6,1500,45 ,0
+2 ,6,1500,47 ,0
+4 ,2,500,9 ,0
+2 ,2,500,11 ,1
+2 ,2,500,11 ,0
+2 ,2,500,11 ,1
+4 ,6,1500,38 ,1
+3 ,4,1000,29 ,1
+9 ,9,2250,38 ,0
+11 ,5,1250,18 ,0
+2 ,3,750,21 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,1
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+2 ,1,250,2 ,0
+11 ,11,2750,38 ,0
+2 ,3,750,22 ,0
+9 ,11,2750,49 ,1
+5 ,11,2750,75 ,0
+3 ,5,1250,38 ,0
+3 ,1,250,3 ,1
+4 ,6,1500,43 ,0
+2 ,3,750,24 ,0
+12 ,11,2750,39 ,0
+2 ,2,500,14 ,0
+4 ,6,1500,46 ,0
+9 ,3,750,14 ,0
+14 ,8,2000,26 ,0
+4 ,2,500,13 ,0
+4 ,11,2750,95 ,0
+2 ,7,1750,77 ,0
+2 ,7,1750,77 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,0
+4 ,1,250,4 ,1
+4 ,1,250,4 ,0
+4 ,7,1750,62 ,0
+4 ,1,250,4 ,0
+4 ,4,1000,34 ,1
+11 ,6,1500,28 ,0
+13 ,3,750,14 ,1
+7 ,5,1250,35 ,0
+9 ,9,2250,54 ,0
+11 ,2,500,11 ,0
+2 ,5,1250,63 ,0
+7 ,11,2750,89 ,0
+8 ,9,2250,64 ,0
+2 ,2,500,22 ,0
+6 ,3,750,26 ,0
+12 ,15,3750,71 ,0
+13 ,3,750,16 ,0
+11 ,16,4000,89 ,0
+4 ,5,1250,58 ,0
+14 ,7,1750,35 ,0
+11 ,4,1000,27 ,0
+7 ,9,2250,89 ,1
+11 ,8,2000,52 ,1
+7 ,5,1250,52 ,0
+11 ,6,1500,41 ,0
+10 ,5,1250,38 ,0
+14 ,2,500,14 ,1
+14 ,2,500,14 ,0
+14 ,2,500,14 ,0
+2 ,2,500,33 ,0
+11 ,3,750,23 ,0
+14 ,8,2000,46 ,0
+9 ,1,250,9 ,0
+16 ,5,1250,27 ,0
+14 ,4,1000,26 ,0
+4 ,2,500,30 ,0
+14 ,3,750,21 ,0
+16 ,16,4000,77 ,0
+4 ,2,500,31 ,0
+14 ,8,2000,50 ,0
+11 ,3,750,26 ,0
+14 ,7,1750,45 ,0
+15 ,5,1250,33 ,0
+16 ,2,500,16 ,0
+16 ,3,750,21 ,0
+11 ,8,2000,72 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,0
+11 ,1,250,11 ,1
+11 ,1,250,11 ,0
+2 ,3,750,75 ,1
+2 ,3,750,77 ,0
+16 ,4,1000,28 ,0
+16 ,15,3750,87 ,0
+16 ,14,3500,83 ,0
+16 ,10,2500,62 ,0
+16 ,3,750,23 ,0
+14 ,3,750,26 ,0
+23 ,19,4750,62 ,0
+11 ,7,1750,75 ,0
+14 ,3,750,28 ,0
+20 ,14,3500,69 ,1
+4 ,2,500,46 ,0
+11 ,2,500,25 ,0
+11 ,3,750,37 ,0
+16 ,4,1000,33 ,0
+21 ,7,1750,38 ,0
+13 ,7,1750,76 ,0
+16 ,6,1500,50 ,0
+14 ,3,750,33 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+14 ,1,250,14 ,0
+17 ,7,1750,58 ,1
+14 ,3,750,35 ,0
+14 ,3,750,35 ,0
+16 ,7,1750,64 ,0
+21 ,2,500,21 ,0
+16 ,3,750,35 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+16 ,1,250,16 ,0
+14 ,2,500,29 ,0
+11 ,4,1000,74 ,0
+11 ,2,500,38 ,1
+21 ,6,1500,48 ,0
+23 ,2,500,23 ,0
+23 ,6,1500,45 ,0
+14 ,2,500,35 ,1
+16 ,6,1500,81 ,0
+16 ,4,1000,58 ,0
+16 ,5,1250,71 ,0
+21 ,2,500,26 ,0
+21 ,3,750,35 ,0
+21 ,3,750,35 ,0
+23 ,8,2000,69 ,0
+21 ,3,750,38 ,0
+23 ,3,750,35 ,0
+21 ,3,750,40 ,0
+23 ,2,500,28 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+25 ,6,1500,50 ,0
+21 ,1,250,21 ,0
+21 ,1,250,21 ,0
+23 ,3,750,39 ,0
+21 ,2,500,33 ,0
+14 ,3,750,79 ,0
+23 ,1,250,23 ,1
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,1,250,23 ,0
+23 ,4,1000,52 ,0
+23 ,1,250,23 ,0
+23 ,7,1750,88 ,0
+16 ,3,750,86 ,0
+23 ,2,500,38 ,0
+21 ,2,500,52 ,0
+23 ,3,750,62 ,0
+39 ,1,250,39 ,0
 72 ,1,250,72 ,0
\ No newline at end of file


[50/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/Cox-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox-predict.dml b/scripts/algorithms/Cox-predict.dml
index 6f2bec0..7d444fd 100644
--- a/scripts/algorithms/Cox-predict.dml
+++ b/scripts/algorithms/Cox-predict.dml
@@ -1,181 +1,181 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A COX PROPORTIONAL HAZARD REGRESSION MODEL TO A NEW (TEST) DATASET
-# 
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---          Location to read the input matrix containing the survival data with the following schema:
-# 								 - X[,1]: timestamps 
-#								 - X[,2]: whether an event occurred (1) or data is censored (0)
-#								 - X[,3:]: feature vectors (excluding the baseline columns) used for model fitting	 
-# RT      String   ---			Location to read column matrix RT containing the (order preserving) recoded timestamps from X 	
-# M       String   ---			Location to read matrix M containing the fitted Cox model with the following schema:
-#								 - M[,1]: betas	
-#								 - M[,2]: exp(betas)
-#								 - M[,3]: standard error of betas
-#								 - M[,4]: Z 
-#								 - M[,5]: p-value
-#								 - M[,6]: lower 100*(1-alpha)% confidence interval of betas
-#								 - M[,7]: upper 100*(1-alpha)% confidence interval of betas
-# Y       String   --- 			Location to read matrix Y used for prediction  
-# COV	  String   ---			Location to read the variance-covariance matrix of the betas	
-# MF      String   ---          Location to read column indices of X excluding the baseline factors if available
-# P       String   ---          Location to store matrix P containing the results of prediction
-# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- A matrix P with the following schema:
-#	P[,1]: linear predictors relative to a baseline which contains the mean values for each feature
-#	   	   i.e., (Y[3:] - colMeans (X[3:])) %*% b
-#	P[,2]: standard error of linear predictors
-#	P[,3]: risk relative to a baseline which contains the mean values for each feature
-#		   i.e., exp ((Y[3:] - colMeans (X[3:])) %*% b)
-#	P[,4]: standard error of risk 
-#	P[,5]: estimates of cumulative hazard
-#	P[,6]: standard error of the estimates of cumulative hazard
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f cox-predict.dml -nvargs X=INPUT_DIR/X RT=INPUT_DIR/RT M=INPUT_DIR/M Y=INPUT_DIR/Y 
-#											  COV=INTPUT_DIR/COV MF=INPUT_DIR/MF P=OUTPUT_DIR/P fmt=csv
-
-fileX = $X;
-fileRT = $RT;
-fileMF = $MF;
-fileY = $Y; 
-fileM = $M;
-fileCOV = $COV;
-fileP = $P; 
-
-# Default values of some parameters
-fmtO = ifdef ($fmt, "text");       # $fmt="text" 
-
-X_orig = read (fileX);
-RT_X = read (fileRT); 
-Y_orig = read (fileY);
-M = read (fileM);
-b = M[,1];
-COV = read (fileCOV);
-
-col_ind = read (fileMF);
-tab = table (col_ind, seq (1, nrow (col_ind)), ncol (Y_orig), nrow (col_ind));
-Y_orig = Y_orig %*% tab;
-
-
-# Y and X have the same dimensions and schema
-if (ncol (Y_orig) != ncol (X_orig)) { 
-	stop ("Y has a wrong number of columns!");
-}
-
-X = X_orig[,3:ncol (X_orig)];
-T_X = X_orig[,1]; 
-E_X = X_orig[,2];
-D = ncol (X);
-N = nrow (X);
-Y_orig = order (target = Y_orig, by = 1);
-Y = Y_orig[,3:ncol (X_orig)];
-T_Y = Y_orig[,1];
-
-col_means = colMeans (X);
-ones = matrix (1, rows = nrow (Y), cols = 1);
-Y_rel = Y - (ones %*% col_means);
-
-##### compute linear predictors
-LP = Y_rel %*% b; 
-# compute standard error of linear predictors using the Delta method
-se_LP = diag(sqrt (Y_rel %*% COV %*% t(Y_rel)));
-
-##### compute risk
-R = exp (Y_rel %*% b);
-# compute standard error of risk using the Delta method
-se_R = diag(sqrt ((Y_rel * R) %*% COV %*% t(Y_rel * R))) / sqrt (exp (LP));
-
-##### compute estimates of cumulative hazard together with their standard errors: 
-# 1. col contains cumulative hazard estimates
-# 2. col contains standard errors for cumulative hazard estimates
- 
-d_r = aggregate (target = E_X, groups = RT_X, fn = "sum"); 
-e_r = aggregate (target = RT_X, groups = RT_X, fn = "count");
-Idx = cumsum (e_r); 
-all_times = table (seq (1, nrow (Idx), 1), Idx) %*% T_X; # distinct event times 
-
-event_times = removeEmpty (target = ppred (d_r, 0, ">") * all_times, margin = "rows");
-num_distinct_event = nrow (event_times);
-
-num_distinct = nrow (all_times); # no. of distinct timestamps censored or uncensored
-I_rev = table (seq (1, num_distinct, 1), seq (num_distinct, 1, -1));
-e_r_rev_agg = cumsum (I_rev %*% e_r);
-select = t (colSums (table (seq (1, num_distinct), e_r_rev_agg))); 
-
-min_event_time = min (event_times);
-max_event_time = max (event_times);
-T_Y = T_Y + (min_event_time * ppred (T_Y, min_event_time, "<"));
-T_Y = T_Y + (max_event_time * ppred (T_Y, max_event_time, ">"));
-
-Ind = outer (T_Y, t (event_times), ">=");
-Ind = table (seq (1, nrow (T_Y)), rowIndexMax (Ind), nrow (T_Y), num_distinct_event);
-
-exp_Xb = exp (X %*% b);
-exp_Xb_agg = aggregate (target = exp_Xb, groups = RT_X, fn = "sum");
-exp_Xb_cum = I_rev %*% cumsum (I_rev %*% exp_Xb_agg);
-
-H0 = cumsum (removeEmpty (target = d_r / exp_Xb_cum, margin = "rows"));
-P1 = cumsum (removeEmpty (target = d_r / exp_Xb_cum ^ 2, margin = "rows"));
-X_exp_Xb = X * exp (X %*% b);
-
-I_rev_all = table (seq (1, N, 1), seq (N, 1, -1));
-X_exp_Xb_rev_agg = cumsum (I_rev_all %*% X_exp_Xb);
-X_exp_Xb_rev_agg = removeEmpty (target = X_exp_Xb_rev_agg * select, margin = "rows"); 
-X_exp_Xb_cum = I_rev %*% X_exp_Xb_rev_agg; 
-P2 = cumsum (removeEmpty (target = (X_exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
-
-exp_Yb = exp (Y %*% b);	
-exp_Yb_2 = exp_Yb ^ 2;
-Y_exp_Yb = Y * exp (Y %*% b);
-
-# estimates of cumulative hazard
-H = exp_Yb * (Ind %*% H0);
-
-# term1
-term1 = exp_Yb_2 * (Ind %*% P1);
-
-# term2
-P3 = cumsum (removeEmpty (target = (exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
-P4 = (Ind %*% P2) * exp_Yb;
-P5 = Y_exp_Yb * (Ind %*% P3);
-term2 =  P4 - P5; 
-
-# standard error of the estimates of cumulative hazard
-se_H = sqrt (term1 + rowSums((term2 %*% COV) * term2));
-
-# prepare output matrix
-P = matrix (0, rows = nrow (Y), cols = 6);
-P[,1] = LP;
-P[,2] = se_LP;
-P[,3] = R; 
-P[,4] = se_R;
-P[,5] = H;
-P[,6] = se_H;
-write (P, fileP, format=fmtO);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A COX PROPORTIONAL HAZARD REGRESSION MODEL TO A NEW (TEST) DATASET
+# 
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---          Location to read the input matrix containing the survival data with the following schema:
+# 								 - X[,1]: timestamps 
+#								 - X[,2]: whether an event occurred (1) or data is censored (0)
+#								 - X[,3:]: feature vectors (excluding the baseline columns) used for model fitting	 
+# RT      String   ---			Location to read column matrix RT containing the (order preserving) recoded timestamps from X 	
+# M       String   ---			Location to read matrix M containing the fitted Cox model with the following schema:
+#								 - M[,1]: betas	
+#								 - M[,2]: exp(betas)
+#								 - M[,3]: standard error of betas
+#								 - M[,4]: Z 
+#								 - M[,5]: p-value
+#								 - M[,6]: lower 100*(1-alpha)% confidence interval of betas
+#								 - M[,7]: upper 100*(1-alpha)% confidence interval of betas
+# Y       String   --- 			Location to read matrix Y used for prediction  
+# COV	  String   ---			Location to read the variance-covariance matrix of the betas	
+# MF      String   ---          Location to read column indices of X excluding the baseline factors if available
+# P       String   ---          Location to store matrix P containing the results of prediction
+# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- A matrix P with the following schema:
+#	P[,1]: linear predictors relative to a baseline which contains the mean values for each feature
+#	   	   i.e., (Y[3:] - colMeans (X[3:])) %*% b
+#	P[,2]: standard error of linear predictors
+#	P[,3]: risk relative to a baseline which contains the mean values for each feature
+#		   i.e., exp ((Y[3:] - colMeans (X[3:])) %*% b)
+#	P[,4]: standard error of risk 
+#	P[,5]: estimates of cumulative hazard
+#	P[,6]: standard error of the estimates of cumulative hazard
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f cox-predict.dml -nvargs X=INPUT_DIR/X RT=INPUT_DIR/RT M=INPUT_DIR/M Y=INPUT_DIR/Y 
+#											  COV=INTPUT_DIR/COV MF=INPUT_DIR/MF P=OUTPUT_DIR/P fmt=csv
+
+fileX = $X;
+fileRT = $RT;
+fileMF = $MF;
+fileY = $Y; 
+fileM = $M;
+fileCOV = $COV;
+fileP = $P; 
+
+# Default values of some parameters
+fmtO = ifdef ($fmt, "text");       # $fmt="text" 
+
+X_orig = read (fileX);
+RT_X = read (fileRT); 
+Y_orig = read (fileY);
+M = read (fileM);
+b = M[,1];
+COV = read (fileCOV);
+
+col_ind = read (fileMF);
+tab = table (col_ind, seq (1, nrow (col_ind)), ncol (Y_orig), nrow (col_ind));
+Y_orig = Y_orig %*% tab;
+
+
+# Y and X have the same dimensions and schema
+if (ncol (Y_orig) != ncol (X_orig)) { 
+	stop ("Y has a wrong number of columns!");
+}
+
+X = X_orig[,3:ncol (X_orig)];
+T_X = X_orig[,1]; 
+E_X = X_orig[,2];
+D = ncol (X);
+N = nrow (X);
+Y_orig = order (target = Y_orig, by = 1);
+Y = Y_orig[,3:ncol (X_orig)];
+T_Y = Y_orig[,1];
+
+col_means = colMeans (X);
+ones = matrix (1, rows = nrow (Y), cols = 1);
+Y_rel = Y - (ones %*% col_means);
+
+##### compute linear predictors
+LP = Y_rel %*% b; 
+# compute standard error of linear predictors using the Delta method
+se_LP = diag(sqrt (Y_rel %*% COV %*% t(Y_rel)));
+
+##### compute risk
+R = exp (Y_rel %*% b);
+# compute standard error of risk using the Delta method
+se_R = diag(sqrt ((Y_rel * R) %*% COV %*% t(Y_rel * R))) / sqrt (exp (LP));
+
+##### compute estimates of cumulative hazard together with their standard errors: 
+# 1. col contains cumulative hazard estimates
+# 2. col contains standard errors for cumulative hazard estimates
+ 
+d_r = aggregate (target = E_X, groups = RT_X, fn = "sum"); 
+e_r = aggregate (target = RT_X, groups = RT_X, fn = "count");
+Idx = cumsum (e_r); 
+all_times = table (seq (1, nrow (Idx), 1), Idx) %*% T_X; # distinct event times 
+
+event_times = removeEmpty (target = ppred (d_r, 0, ">") * all_times, margin = "rows");
+num_distinct_event = nrow (event_times);
+
+num_distinct = nrow (all_times); # no. of distinct timestamps censored or uncensored
+I_rev = table (seq (1, num_distinct, 1), seq (num_distinct, 1, -1));
+e_r_rev_agg = cumsum (I_rev %*% e_r);
+select = t (colSums (table (seq (1, num_distinct), e_r_rev_agg))); 
+
+min_event_time = min (event_times);
+max_event_time = max (event_times);
+T_Y = T_Y + (min_event_time * ppred (T_Y, min_event_time, "<"));
+T_Y = T_Y + (max_event_time * ppred (T_Y, max_event_time, ">"));
+
+Ind = outer (T_Y, t (event_times), ">=");
+Ind = table (seq (1, nrow (T_Y)), rowIndexMax (Ind), nrow (T_Y), num_distinct_event);
+
+exp_Xb = exp (X %*% b);
+exp_Xb_agg = aggregate (target = exp_Xb, groups = RT_X, fn = "sum");
+exp_Xb_cum = I_rev %*% cumsum (I_rev %*% exp_Xb_agg);
+
+H0 = cumsum (removeEmpty (target = d_r / exp_Xb_cum, margin = "rows"));
+P1 = cumsum (removeEmpty (target = d_r / exp_Xb_cum ^ 2, margin = "rows"));
+X_exp_Xb = X * exp (X %*% b);
+
+I_rev_all = table (seq (1, N, 1), seq (N, 1, -1));
+X_exp_Xb_rev_agg = cumsum (I_rev_all %*% X_exp_Xb);
+X_exp_Xb_rev_agg = removeEmpty (target = X_exp_Xb_rev_agg * select, margin = "rows"); 
+X_exp_Xb_cum = I_rev %*% X_exp_Xb_rev_agg; 
+P2 = cumsum (removeEmpty (target = (X_exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
+
+exp_Yb = exp (Y %*% b);	
+exp_Yb_2 = exp_Yb ^ 2;
+Y_exp_Yb = Y * exp (Y %*% b);
+
+# estimates of cumulative hazard
+H = exp_Yb * (Ind %*% H0);
+
+# term1
+term1 = exp_Yb_2 * (Ind %*% P1);
+
+# term2
+P3 = cumsum (removeEmpty (target = (exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
+P4 = (Ind %*% P2) * exp_Yb;
+P5 = Y_exp_Yb * (Ind %*% P3);
+term2 =  P4 - P5; 
+
+# standard error of the estimates of cumulative hazard
+se_H = sqrt (term1 + rowSums((term2 %*% COV) * term2));
+
+# prepare output matrix
+P = matrix (0, rows = nrow (Y), cols = 6);
+P[,1] = LP;
+P[,2] = se_LP;
+P[,3] = R; 
+P[,4] = se_R;
+P[,5] = H;
+P[,6] = se_H;
+write (P, fileP, format=fmtO);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index 3cab74d..6da22ce 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -1,502 +1,502 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT FITS A COX PROPORTIONAL HAZARD REGRESSION MODEL.
-# The Breslow method is used for handling ties and the regression parameters 
-# are computed using trust region newton method with conjugate gradient 
-# 
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---          Location to read the input matrix X containing the survival data containing the following information
-# 								 - 1: timestamps 
-#								 - 2: whether an event occurred (1) or data is censored (0)
-#								 - 3: feature vectors 	
-# TE	  String   ---          Column indices of X as a column vector which contain timestamp (first row) and event information (second row)
-# F 	  String   " "			Column indices of X as a column vector which are to be used for fitting the Cox model
-# R   	  String   " "	        If factors (categorical variables) are available in the input matrix X, location to read matrix R containing 
-#								the start and end indices of the factors in X
-#								 - R[,1]: start indices
-#								 - R[,2]: end indices	
-#								Alternatively, user can specify the indices of the baseline level of each factor which needs to be removed from X; 
-#								in this case the start and end indices corresponding to the baseline level need to be the same;
-#								if R is not provided by default all variables are considered to be continuous 
-# M       String   ---          Location to store the results of Cox regression analysis including estimated regression parameters of the fitted 
-#								Cox model (the betas), their standard errors, confidence intervals, and P-values 
-# S       String   " "          Location to store a summary of some statistics of the fitted cox proportional hazard model including			
-#								no. of records, no. of events, log-likelihood, AIC, Rsquare (Cox & Snell), and max possible Rsquare; 
-#								by default is standard output  
-# T       String   " "          Location to store the results of Likelihood ratio test, Wald test, and Score (log-rank) test of the fitted model;
-#								by default is standard output 
-# COV	  String   ---			Location to store the variance-covariance matrix of the betas
-# RT      String   ---			Location to store matrix RT containing the order-preserving recoded timestamps from X 
-# XO      String   ---			Location to store sorted input matrix by the timestamps 
-# MF      String   ---          Location to store column indices of X excluding the baseline factors if available
-# alpha   Double   0.05         Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
-# tol     Double   0.000001     Tolerance ("epsilon")
-# moi     Int      100     		Max. number of outer (Newton) iterations
-# mii     Int      0      		Max. number of inner (conjugate gradient) iterations, 0 = no max   
-# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
-#	M[,1]: betas	
-#	M[,2]: exp(betas)
-#	M[,3]: standard error of betas
-#	M[,4]: Z 
-#	M[,5]: P-value
-#	M[,6]: lower 100*(1-alpha)% confidence interval of betas
-#	M[,7]: upper 100*(1-alpha)% confidence interval of betas
-#
-# Two log files containing a summary of some statistics of the fitted model:
-# 1- File S with the following format 
-#	- line 1: no. of observations
-#	- line 2: no. of events
-#   - line 3: log-likelihood 
-#	- line 4: AIC
-#	- line 5: Rsquare (Cox & Snell)
-#	- line 6: max possible Rsquare
-# 2- File T with the following format
-#	- line 1: Likelihood ratio test statistic, degree of freedom, P-value
-#	- line 2: Wald test statistic, degree of freedom, P-value
-#	- line 3: Score (log-rank) test statistic, degree of freedom, P-value
-# 
-# Additionally, the following matrices are stored (needed for prediction)
-# 1- A column matrix RT that contains the order-preserving recoded timestamps from X 
-# 2- Matrix XO which is matrix X with sorted timestamps  
-# 3- Variance-covariance matrix of the betas COV
-# 4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f Cox.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE F=INTPUT_DIR/F R=INTPUT_DIR/R 
-#									  M=OUTPUT_DIR/M S=OUTPUT_DIR/S T=OUTPUT_DIR/T COV=OUTPUT_DIR/COV RT=OUTPUT_DIR/RT 
-#									  XO=OUTPUT_DIR/XO MF=OUTPUT/MF alpha=0.05 tol=0.000001 moi=100 mii=20 fmt=csv
-
-fileX = $X;
-fileTE = $TE;
-fileRT = $RT;
-fileMF = $MF;
-fileM = $M;
-fileXO = $XO;
-fileCOV = $COV;
-
-# Default values of some parameters
-fileF = ifdef ($F, " ");			 # $F=" "
-fileR = ifdef ($R, " ");         	 # $R=" " 
-fileS = ifdef ($S, " ");         	 # $S=" " 
-fileT = ifdef ($T, " ");         	 # $T=" " 
-fmtO = ifdef ($fmt, "text");         # $fmt="text" 
-alpha = ifdef ($alpha, 0.05);        # $alpha=0.05
-tol = ifdef ($tol, 0.000001);   	 # $tol=0.000001;
-maxiter = ifdef ($moi, 100);    	 # $moi=100;
-maxinneriter = ifdef ($mii, 0);      # $mii=0;
-
-X_orig = read (fileX);
-
-TE = read (fileTE);
-if (fileF != " ") {
-	F = read (fileF);
-}
-
-######## CHECK FOR FACTORS AND REMOVE THE BASELINE OF EACH FACTOR FROM THE DATASET
-
-if (fileR != " ") { # factors available
-	R = read (fileR);
-	if (ncol (R) != 2) {
-		stop ("Matrix R has wrong dimensions!");
-	}
-	print ("REMOVING BASLINE LEVEL OF EACH FACTOR...");	
-	# identify baseline columns to be removed from X_orig
-	col_sum = colSums (X_orig);
-	col_seq = t (seq(1, ncol (X_orig)));
-	parfor (i in 1:nrow (R), check = 0) {
-		start_ind = as.scalar (R[i,1]);
-		end_ind = as.scalar (R[i,2]);
-		baseline_ind = as.scalar (rowIndexMax (col_sum[1, start_ind:end_ind])) + start_ind - 1;
-		col_seq[,baseline_ind] = 0;
-	}
-	ones = matrix (1, rows = nrow (F), cols = 1);
-	F_filter = table (ones, F, 1, ncol (X_orig));
-	F_filter = removeEmpty (target = F_filter * col_seq, margin = "cols");
-	TE_F = t(append (t (TE), F_filter));
-} else if (fileF != " ") { # all features scale
-	TE_F = t(append (t (TE), t(F)));
-} else { # no features available 
-	TE_F = TE;
-}
-
-write (TE_F, fileMF, format = fmtO);
-
-X_orig = X_orig %*% table (TE_F, seq (1, nrow (TE_F)), ncol (X_orig), nrow (TE_F));
-
-######## RECODING TIMESTAMPS PRESERVING THE ORDER
-print ("RECODING TIMESTAMPS...");
-
-N = nrow (X_orig);
-X_orig = order (target = X_orig, by = 1);
-Idx = matrix (1, rows = N, cols = 1);
-num_timestamps = 1;
-if (N == 1) {
-	RT = matrix (1, rows = 1, cols = 1);
-} else {
-	Idx[2:N,1] = ppred (X_orig[1:(N - 1),1], X_orig[2:N,1], "!=");
-	num_timestamps = sum (Idx);
-	A = removeEmpty (target = diag (Idx), margin = "cols");
-	if (ncol (A) > 1) {
-		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
-		B = cumsum (A);
-		RT = B %*% seq(1, ncol(B));
-	} else { # there is only one group
-		RT = matrix (1, rows = N, cols = 1);
-	}
-}
-E = X_orig[,2]; 
-
-print ("BEGIN COX PROPORTIONAL HAZARD SCRIPT");
-
-######## PARAMETERS OF THE TRUST REGION NEWTON METHOD WITH CONJUGATE GRADIENT
-#  b: the regression betas
-#  o: loss function value
-#  g: loss function gradient
-#  H: loss function Hessian
-# sb: shift of b in one iteration
-# so: shift of o in one iteration
-#  r: CG residual = H %*% sb + g
-#  d: CG direction vector
-# Hd: = H %*% d
-#  c: scalar coefficient in CG
-# delta: trust region size
-# tol: tolerance value
-#  i: outer (Newton) iteration count
-#  j: inner (CG) iteration count
-
-# computing initial coefficients b (all initialized to 0)
-if (ncol (X_orig) > 2) {
-	X = X_orig[,3:ncol(X_orig)];
-	D = ncol (X);
-	zeros_D = matrix (0, rows = D, cols = 1);
-	b = zeros_D; 
-}
-d_r = aggregate (target = E, groups = RT, fn = "sum"); 
-e_r = aggregate (target = RT, groups = RT, fn = "count");
-
-# computing initial loss function value o
-num_distinct = nrow (d_r); # no. of distinct timestamps
-e_r_rev_agg = cumsum (rev(e_r));
-d_r_rev = rev(d_r);
-o = sum (d_r_rev * log (e_r_rev_agg));
-o_init = o;
-if (ncol (X_orig) < 3) {
-	loglik = -o;
-	S_str = "no. of records " + N + " loglik " + loglik;
-	if (fileS != " ") {
-		write (S_str, fileS, format = fmtO);
-	} else {
-		print (S_str);
-	}
-	stop ("No features are selected!");
-}
-
-# computing initial gradient g 
-# part 1 g0_1
-g0_1 = - t (colSums (X * E)); # g_1
-# part 2 g0_2
-X_rev_agg = cumsum (rev(X));
-select = table (seq (1, num_distinct), e_r_rev_agg);
-X_agg = select %*% X_rev_agg;
-g0_2 = t (colSums ((X_agg * d_r_rev)/ e_r_rev_agg));
-#
-g0 = g0_1 + g0_2;
-g = g0; 
-
-# initialization for trust region Newton method
-delta = 0.5 * sqrt (D) / max (sqrt (rowSums (X ^ 2))); 
-initial_g2 = sum (g ^ 2);
-exit_g2 = initial_g2 * tol ^ 2;
-maxiter = 100; 
-maxinneriter = min (D, 100); 
-i = 0;
-sum_g2 = sum (g ^ 2);
-while (sum_g2 > exit_g2 & i < maxiter) {  
-	i = i + 1;
-    sb = zeros_D;  
-    r = g;  
-    r2 = sum (r ^ 2);  
-    exit_r2 = 0.01 * r2;
-    d = - r;  
-    trust_bound_reached = FALSE;  
-    j = 0;
-
-	exp_Xb = exp (X %*% b);
-	exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
-	D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
-	X_exp_Xb = X * exp_Xb;
-	X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-	X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
-
-    while (r2 > exit_r2 & (! trust_bound_reached) & j < maxinneriter) { 
-        j = j + 1;
-		# computing Hessian times d (Hd)
-		# part 1 Hd_1
-		Xd = X %*% d;
-		X_Xd_exp_Xb = X * (Xd * exp_Xb);
-		X_Xd_exp_Xb_rev_agg = cumsum (rev(X_Xd_exp_Xb));
-		X_Xd_exp_Xb_rev_agg = select %*% X_Xd_exp_Xb_rev_agg;
-		
-		Hd_1 = X_Xd_exp_Xb_rev_agg / D_r_rev;
-		# part 2 Hd_2
-		
-		Xd_exp_Xb = Xd * exp_Xb;
-		Xd_exp_Xb_rev_agg = cumsum (rev(Xd_exp_Xb));
-		Xd_exp_Xb_rev_agg = select %*% Xd_exp_Xb_rev_agg;
-		
-		Hd_2_num = X_exp_Xb_rev_agg * Xd_exp_Xb_rev_agg; # numerator
-		Hd_2 = Hd_2_num / (D_r_rev ^ 2);
-		
-		Hd = t (colSums ((Hd_1 - Hd_2) * d_r_rev));
-
-		c = r2 / sum (d * Hd);
-        [c, trust_bound_reached] = ensure_trust_bound (c, sum(d ^ 2), 2 * sum(sb * d), sum(sb ^ 2) - delta ^ 2);
-        sb = sb + c * d;  
-        r = r + c * Hd;  
-        r2_new = sum (r ^ 2);
-        d = - r + (r2_new / r2) * d;  
-        r2 = r2_new;
-    }
-
-    # computing loss change in 1 iteration (so)
-	# part 1 so_1
-	so_1 = - as.scalar (colSums (X * E) %*% (b + sb)); 
-	# part 2 so_2
-	exp_Xbsb = exp (X %*% (b + sb));
-	exp_Xbsb_agg = aggregate (target = exp_Xbsb, groups = RT, fn = "sum");
-	so_2 = sum (d_r_rev * log (cumsum (rev(exp_Xbsb_agg))));
-	#
-	so = so_1 + so_2;
-	so = so - o; 
-	
-	delta = update_trust_bound (delta, sqrt (sum (sb ^ 2)), so, sum (sb * g), 0.5 * sum (sb * (r + g)));
-    if (so < 0) {
-        b = b + sb;  
-        o = o + so;  
-		# compute new gradient g
-		exp_Xb = exp (X %*% b);
-		exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
-		X_exp_Xb = X * exp_Xb;
-		X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-		X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
-		
-		D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
-		g_2 = t (colSums ((X_exp_Xb_rev_agg / D_r_rev) * d_r_rev));
-		g = g0_1 + g_2;
-		sum_g2 = sum (g ^ 2);
-    }
-}
-
-if (sum_g2 > exit_g2 & i >= maxiter) {
-	print ("Trust region Newton method did not converge!");
-}
-
-
-print ("COMPUTING HESSIAN...");
-
-H0 = matrix (0, rows = D, cols = D);
-H = matrix (0, rows = D, cols = D);
-
-X_exp_Xb_rev_2 = rev(X_exp_Xb);
-X_rev_2 = rev(X);
-
-X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg; 
-
-parfor (i in 1:D, check = 0) {
-	Xi = X[,i];
-	Xi_rev = rev(Xi);
-
-	## ----------Start calculating H0--------------	
-	# part 1 H0_1
-	Xi_X = X_rev_2[,i:D] * Xi_rev;
-	Xi_X_rev_agg = cumsum (Xi_X);
-	Xi_X_rev_agg = select %*% Xi_X_rev_agg;
-	H0_1 = Xi_X_rev_agg / e_r_rev_agg;
-
-	# part 2 H0_2
-	Xi_agg = aggregate (target = Xi, groups = RT, fn = "sum");
-	Xi_agg_rev_agg = cumsum (rev(Xi_agg));	
-	H0_2_num = X_agg[,i:D] * Xi_agg_rev_agg; # numerator
-	H0_2 = H0_2_num / (e_r_rev_agg ^ 2);
-
-	H0[i,i:D] = colSums ((H0_1 - H0_2) * d_r_rev);
-	#-----------End calculating H0--------------------
-		
-	## ----------Start calculating H--------------
-	# part 1 H_1
-	Xi_X_exp_Xb = X_exp_Xb_rev_2[,i:D] * Xi_rev;
-	Xi_X_exp_Xb_rev_agg = cumsum (Xi_X_exp_Xb);
-	Xi_X_exp_Xb_rev_agg = select %*% Xi_X_exp_Xb_rev_agg;
-	H_1 = Xi_X_exp_Xb_rev_agg / D_r_rev;
-
-	# part 2 H_2
-	Xi_exp_Xb = exp_Xb * Xi;
-	Xi_exp_Xb_agg = aggregate (target = Xi_exp_Xb, groups = RT, fn = "sum"); 
-	
-	Xi_exp_Xb_agg_rev_agg = cumsum (rev(Xi_exp_Xb_agg));
-	H_2_num = X_exp_Xb_rev_agg[,i:D] * Xi_exp_Xb_agg_rev_agg; # numerator
-	H_2 = H_2_num / (D_r_rev ^ 2);
-	H[i,i:D] = colSums ((H_1 - H_2) * d_r_rev);
-	#-----------End calculating H--------------------
-}
-H = H + t(H) - diag( diag (H));
-H0 = H0 + t(H0) - diag( diag (H0));
-
-
-# compute standard error for betas
-H_inv = inv (H);
-se_b = sqrt (diag (H_inv));
-
-# compute exp(b), Z, Pr[>|Z|]
-exp_b = exp (b);
-Z = b / se_b;
-P = matrix (0, rows = D, cols = 1);
-parfor (i in 1:D) {
-	P[i,1] = 2 - 2 * (cdf (target = abs (as.scalar (Z[i,1])), dist = "normal"));
-}
-
-# compute confidence intervals for b
-z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
-CI_l = b - se_b * z_alpha_2;
-CI_r = b - se_b + z_alpha_2;
-
-######## SOME STATISTICS AND TESTS
-# no. of records
-S_str = "no. of records " + N;
-
-# no.of events
-S_str = append (S_str, "no. of events " + sum (E));
-
-# log-likelihood
-loglik = -o;
-S_str = append (S_str, "loglik " + loglik + " ");
-
-# AIC = -2 * loglik + 2 * D
-AIC = -2 * loglik + 2 * D;
-S_str = append (S_str, "AIC " + AIC + " ");
-
-# Wald test
-wald_t = as.scalar (t(b) %*% H %*% b);
-wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
-T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
-
-# Likelihood ratio test
-lratio_t = 2 * o_init - 2 * o;
-lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
-T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
-
-
-H0_inv = inv (H0);
-score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
-score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
-T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
-
-# Rsquare (Cox & Snell)
-Rsquare = 1 - exp (-lratio_t / N);  
-Rsquare_max = 1 - exp (-2 * o_init / N);
-S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
-S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
-
-M = matrix (0, rows = D, cols = 7);
-M[,1] = b;
-M[,2] = exp_b;
-M[,3] = se_b;
-M[,4] = Z;
-M[,5] = P;
-M[,6] = CI_l;
-M[,7] = CI_r;
-
-write (M, fileM, format = fmtO);
-if (fileS != " ") {
-	write (S_str, fileS, format = fmtO);
-} else {
-	print (S_str);
-}
-if (fileT != " ") {
-	write (T_str, fileT, format = fmtO);
-} else {
-	print (T_str);
-}
-# needed for prediction
-write (RT, fileRT, format = fmtO);
-write (H_inv, fileCOV, format = fmtO);
-write (X_orig, fileXO, format = fmtO);
-
-
-####### UDFS FOR TRUST REGION NEWTON METHOD
-
-ensure_trust_bound =
-    function (double x, double a, double b, double c)
-    return (double x_new, boolean is_violated)
-{
-    if (a * x^2 + b * x + c > 0)
-    {
-        is_violated = TRUE;
-        rad = sqrt (b ^ 2 - 4 * a * c);
-        if (b >= 0) {
-            x_new = - (2 * c) / (b + rad);
-        } else {
-            x_new = - (b - rad) / (2 * a);
-        }
-    } else {
-        is_violated = FALSE;
-        x_new = x;
-    }
-}
-
-update_trust_bound =
-    function (double delta,
-              double sb_distance,
-              double so_exact,
-              double so_linear_approx,
-              double so_quadratic_approx)
-    return   (double delta)
-{
-    sigma1 = 0.25;
-    sigma2 = 0.5;
-    sigma3 = 4.0;
-
-    if (so_exact <= so_linear_approx) {
-       alpha = sigma3;
-    } else {
-       alpha = max (sigma1, - 0.5 * so_linear_approx / (so_exact - so_linear_approx));
-    }
-
-    rho = so_exact / so_quadratic_approx;
-    if (rho < 0.0001) {
-        delta = min (max (alpha, sigma1) * sb_distance, sigma2 * delta);
-    } else { if (rho < 0.25) {
-        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma2 * delta));
-    } else { if (rho < 0.75) {
-        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma3 * delta));
-    } else {
-        delta = max (delta, min (alpha * sb_distance, sigma3 * delta));
-    }}} 
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT FITS A COX PROPORTIONAL HAZARD REGRESSION MODEL.
+# The Breslow method is used for handling ties and the regression parameters 
+# are computed using trust region newton method with conjugate gradient 
+# 
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---          Location to read the input matrix X containing the survival data containing the following information
+# 								 - 1: timestamps 
+#								 - 2: whether an event occurred (1) or data is censored (0)
+#								 - 3: feature vectors 	
+# TE	  String   ---          Column indices of X as a column vector which contain timestamp (first row) and event information (second row)
+# F 	  String   " "			Column indices of X as a column vector which are to be used for fitting the Cox model
+# R   	  String   " "	        If factors (categorical variables) are available in the input matrix X, location to read matrix R containing 
+#								the start and end indices of the factors in X
+#								 - R[,1]: start indices
+#								 - R[,2]: end indices	
+#								Alternatively, user can specify the indices of the baseline level of each factor which needs to be removed from X; 
+#								in this case the start and end indices corresponding to the baseline level need to be the same;
+#								if R is not provided by default all variables are considered to be continuous 
+# M       String   ---          Location to store the results of Cox regression analysis including estimated regression parameters of the fitted 
+#								Cox model (the betas), their standard errors, confidence intervals, and P-values 
+# S       String   " "          Location to store a summary of some statistics of the fitted cox proportional hazard model including			
+#								no. of records, no. of events, log-likelihood, AIC, Rsquare (Cox & Snell), and max possible Rsquare; 
+#								by default is standard output  
+# T       String   " "          Location to store the results of Likelihood ratio test, Wald test, and Score (log-rank) test of the fitted model;
+#								by default is standard output 
+# COV	  String   ---			Location to store the variance-covariance matrix of the betas
+# RT      String   ---			Location to store matrix RT containing the order-preserving recoded timestamps from X 
+# XO      String   ---			Location to store sorted input matrix by the timestamps 
+# MF      String   ---          Location to store column indices of X excluding the baseline factors if available
+# alpha   Double   0.05         Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
+# tol     Double   0.000001     Tolerance ("epsilon")
+# moi     Int      100     		Max. number of outer (Newton) iterations
+# mii     Int      0      		Max. number of inner (conjugate gradient) iterations, 0 = no max   
+# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
+#	M[,1]: betas	
+#	M[,2]: exp(betas)
+#	M[,3]: standard error of betas
+#	M[,4]: Z 
+#	M[,5]: P-value
+#	M[,6]: lower 100*(1-alpha)% confidence interval of betas
+#	M[,7]: upper 100*(1-alpha)% confidence interval of betas
+#
+# Two log files containing a summary of some statistics of the fitted model:
+# 1- File S with the following format 
+#	- line 1: no. of observations
+#	- line 2: no. of events
+#   - line 3: log-likelihood 
+#	- line 4: AIC
+#	- line 5: Rsquare (Cox & Snell)
+#	- line 6: max possible Rsquare
+# 2- File T with the following format
+#	- line 1: Likelihood ratio test statistic, degree of freedom, P-value
+#	- line 2: Wald test statistic, degree of freedom, P-value
+#	- line 3: Score (log-rank) test statistic, degree of freedom, P-value
+# 
+# Additionally, the following matrices are stored (needed for prediction)
+# 1- A column matrix RT that contains the order-preserving recoded timestamps from X 
+# 2- Matrix XO which is matrix X with sorted timestamps  
+# 3- Variance-covariance matrix of the betas COV
+# 4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f Cox.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE F=INTPUT_DIR/F R=INTPUT_DIR/R 
+#									  M=OUTPUT_DIR/M S=OUTPUT_DIR/S T=OUTPUT_DIR/T COV=OUTPUT_DIR/COV RT=OUTPUT_DIR/RT 
+#									  XO=OUTPUT_DIR/XO MF=OUTPUT/MF alpha=0.05 tol=0.000001 moi=100 mii=20 fmt=csv
+
+fileX = $X;
+fileTE = $TE;
+fileRT = $RT;
+fileMF = $MF;
+fileM = $M;
+fileXO = $XO;
+fileCOV = $COV;
+
+# Default values of some parameters
+fileF = ifdef ($F, " ");			 # $F=" "
+fileR = ifdef ($R, " ");         	 # $R=" " 
+fileS = ifdef ($S, " ");         	 # $S=" " 
+fileT = ifdef ($T, " ");         	 # $T=" " 
+fmtO = ifdef ($fmt, "text");         # $fmt="text" 
+alpha = ifdef ($alpha, 0.05);        # $alpha=0.05
+tol = ifdef ($tol, 0.000001);   	 # $tol=0.000001;
+maxiter = ifdef ($moi, 100);    	 # $moi=100;
+maxinneriter = ifdef ($mii, 0);      # $mii=0;
+
+X_orig = read (fileX);
+
+TE = read (fileTE);
+if (fileF != " ") {
+	F = read (fileF);
+}
+
+######## CHECK FOR FACTORS AND REMOVE THE BASELINE OF EACH FACTOR FROM THE DATASET
+
+if (fileR != " ") { # factors available
+	R = read (fileR);
+	if (ncol (R) != 2) {
+		stop ("Matrix R has wrong dimensions!");
+	}
+	print ("REMOVING BASLINE LEVEL OF EACH FACTOR...");	
+	# identify baseline columns to be removed from X_orig
+	col_sum = colSums (X_orig);
+	col_seq = t (seq(1, ncol (X_orig)));
+	parfor (i in 1:nrow (R), check = 0) {
+		start_ind = as.scalar (R[i,1]);
+		end_ind = as.scalar (R[i,2]);
+		baseline_ind = as.scalar (rowIndexMax (col_sum[1, start_ind:end_ind])) + start_ind - 1;
+		col_seq[,baseline_ind] = 0;
+	}
+	ones = matrix (1, rows = nrow (F), cols = 1);
+	F_filter = table (ones, F, 1, ncol (X_orig));
+	F_filter = removeEmpty (target = F_filter * col_seq, margin = "cols");
+	TE_F = t(append (t (TE), F_filter));
+} else if (fileF != " ") { # all features scale
+	TE_F = t(append (t (TE), t(F)));
+} else { # no features available 
+	TE_F = TE;
+}
+
+write (TE_F, fileMF, format = fmtO);
+
+X_orig = X_orig %*% table (TE_F, seq (1, nrow (TE_F)), ncol (X_orig), nrow (TE_F));
+
+######## RECODING TIMESTAMPS PRESERVING THE ORDER
+print ("RECODING TIMESTAMPS...");
+
+N = nrow (X_orig);
+X_orig = order (target = X_orig, by = 1);
+Idx = matrix (1, rows = N, cols = 1);
+num_timestamps = 1;
+if (N == 1) {
+	RT = matrix (1, rows = 1, cols = 1);
+} else {
+	Idx[2:N,1] = ppred (X_orig[1:(N - 1),1], X_orig[2:N,1], "!=");
+	num_timestamps = sum (Idx);
+	A = removeEmpty (target = diag (Idx), margin = "cols");
+	if (ncol (A) > 1) {
+		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
+		B = cumsum (A);
+		RT = B %*% seq(1, ncol(B));
+	} else { # there is only one group
+		RT = matrix (1, rows = N, cols = 1);
+	}
+}
+E = X_orig[,2]; 
+
+print ("BEGIN COX PROPORTIONAL HAZARD SCRIPT");
+
+######## PARAMETERS OF THE TRUST REGION NEWTON METHOD WITH CONJUGATE GRADIENT
+#  b: the regression betas
+#  o: loss function value
+#  g: loss function gradient
+#  H: loss function Hessian
+# sb: shift of b in one iteration
+# so: shift of o in one iteration
+#  r: CG residual = H %*% sb + g
+#  d: CG direction vector
+# Hd: = H %*% d
+#  c: scalar coefficient in CG
+# delta: trust region size
+# tol: tolerance value
+#  i: outer (Newton) iteration count
+#  j: inner (CG) iteration count
+
+# computing initial coefficients b (all initialized to 0)
+if (ncol (X_orig) > 2) {
+	X = X_orig[,3:ncol(X_orig)];
+	D = ncol (X);
+	zeros_D = matrix (0, rows = D, cols = 1);
+	b = zeros_D; 
+}
+d_r = aggregate (target = E, groups = RT, fn = "sum"); 
+e_r = aggregate (target = RT, groups = RT, fn = "count");
+
+# computing initial loss function value o
+num_distinct = nrow (d_r); # no. of distinct timestamps
+e_r_rev_agg = cumsum (rev(e_r));
+d_r_rev = rev(d_r);
+o = sum (d_r_rev * log (e_r_rev_agg));
+o_init = o;
+if (ncol (X_orig) < 3) {
+	loglik = -o;
+	S_str = "no. of records " + N + " loglik " + loglik;
+	if (fileS != " ") {
+		write (S_str, fileS, format = fmtO);
+	} else {
+		print (S_str);
+	}
+	stop ("No features are selected!");
+}
+
+# computing initial gradient g 
+# part 1 g0_1
+g0_1 = - t (colSums (X * E)); # g_1
+# part 2 g0_2
+X_rev_agg = cumsum (rev(X));
+select = table (seq (1, num_distinct), e_r_rev_agg);
+X_agg = select %*% X_rev_agg;
+g0_2 = t (colSums ((X_agg * d_r_rev)/ e_r_rev_agg));
+#
+g0 = g0_1 + g0_2;
+g = g0; 
+
+# initialization for trust region Newton method
+delta = 0.5 * sqrt (D) / max (sqrt (rowSums (X ^ 2))); 
+initial_g2 = sum (g ^ 2);
+exit_g2 = initial_g2 * tol ^ 2;
+maxiter = 100; 
+maxinneriter = min (D, 100); 
+i = 0;
+sum_g2 = sum (g ^ 2);
+while (sum_g2 > exit_g2 & i < maxiter) {  
+	i = i + 1;
+    sb = zeros_D;  
+    r = g;  
+    r2 = sum (r ^ 2);  
+    exit_r2 = 0.01 * r2;
+    d = - r;  
+    trust_bound_reached = FALSE;  
+    j = 0;
+
+	exp_Xb = exp (X %*% b);
+	exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
+	D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
+	X_exp_Xb = X * exp_Xb;
+	X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+	X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
+
+    while (r2 > exit_r2 & (! trust_bound_reached) & j < maxinneriter) { 
+        j = j + 1;
+		# computing Hessian times d (Hd)
+		# part 1 Hd_1
+		Xd = X %*% d;
+		X_Xd_exp_Xb = X * (Xd * exp_Xb);
+		X_Xd_exp_Xb_rev_agg = cumsum (rev(X_Xd_exp_Xb));
+		X_Xd_exp_Xb_rev_agg = select %*% X_Xd_exp_Xb_rev_agg;
+		
+		Hd_1 = X_Xd_exp_Xb_rev_agg / D_r_rev;
+		# part 2 Hd_2
+		
+		Xd_exp_Xb = Xd * exp_Xb;
+		Xd_exp_Xb_rev_agg = cumsum (rev(Xd_exp_Xb));
+		Xd_exp_Xb_rev_agg = select %*% Xd_exp_Xb_rev_agg;
+		
+		Hd_2_num = X_exp_Xb_rev_agg * Xd_exp_Xb_rev_agg; # numerator
+		Hd_2 = Hd_2_num / (D_r_rev ^ 2);
+		
+		Hd = t (colSums ((Hd_1 - Hd_2) * d_r_rev));
+
+		c = r2 / sum (d * Hd);
+        [c, trust_bound_reached] = ensure_trust_bound (c, sum(d ^ 2), 2 * sum(sb * d), sum(sb ^ 2) - delta ^ 2);
+        sb = sb + c * d;  
+        r = r + c * Hd;  
+        r2_new = sum (r ^ 2);
+        d = - r + (r2_new / r2) * d;  
+        r2 = r2_new;
+    }
+
+    # computing loss change in 1 iteration (so)
+	# part 1 so_1
+	so_1 = - as.scalar (colSums (X * E) %*% (b + sb)); 
+	# part 2 so_2
+	exp_Xbsb = exp (X %*% (b + sb));
+	exp_Xbsb_agg = aggregate (target = exp_Xbsb, groups = RT, fn = "sum");
+	so_2 = sum (d_r_rev * log (cumsum (rev(exp_Xbsb_agg))));
+	#
+	so = so_1 + so_2;
+	so = so - o; 
+	
+	delta = update_trust_bound (delta, sqrt (sum (sb ^ 2)), so, sum (sb * g), 0.5 * sum (sb * (r + g)));
+    if (so < 0) {
+        b = b + sb;  
+        o = o + so;  
+		# compute new gradient g
+		exp_Xb = exp (X %*% b);
+		exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
+		X_exp_Xb = X * exp_Xb;
+		X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+		X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
+		
+		D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
+		g_2 = t (colSums ((X_exp_Xb_rev_agg / D_r_rev) * d_r_rev));
+		g = g0_1 + g_2;
+		sum_g2 = sum (g ^ 2);
+    }
+}
+
+if (sum_g2 > exit_g2 & i >= maxiter) {
+	print ("Trust region Newton method did not converge!");
+}
+
+
+print ("COMPUTING HESSIAN...");
+
+H0 = matrix (0, rows = D, cols = D);
+H = matrix (0, rows = D, cols = D);
+
+X_exp_Xb_rev_2 = rev(X_exp_Xb);
+X_rev_2 = rev(X);
+
+X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg; 
+
+parfor (i in 1:D, check = 0) {
+	Xi = X[,i];
+	Xi_rev = rev(Xi);
+
+	## ----------Start calculating H0--------------	
+	# part 1 H0_1
+	Xi_X = X_rev_2[,i:D] * Xi_rev;
+	Xi_X_rev_agg = cumsum (Xi_X);
+	Xi_X_rev_agg = select %*% Xi_X_rev_agg;
+	H0_1 = Xi_X_rev_agg / e_r_rev_agg;
+
+	# part 2 H0_2
+	Xi_agg = aggregate (target = Xi, groups = RT, fn = "sum");
+	Xi_agg_rev_agg = cumsum (rev(Xi_agg));	
+	H0_2_num = X_agg[,i:D] * Xi_agg_rev_agg; # numerator
+	H0_2 = H0_2_num / (e_r_rev_agg ^ 2);
+
+	H0[i,i:D] = colSums ((H0_1 - H0_2) * d_r_rev);
+	#-----------End calculating H0--------------------
+		
+	## ----------Start calculating H--------------
+	# part 1 H_1
+	Xi_X_exp_Xb = X_exp_Xb_rev_2[,i:D] * Xi_rev;
+	Xi_X_exp_Xb_rev_agg = cumsum (Xi_X_exp_Xb);
+	Xi_X_exp_Xb_rev_agg = select %*% Xi_X_exp_Xb_rev_agg;
+	H_1 = Xi_X_exp_Xb_rev_agg / D_r_rev;
+
+	# part 2 H_2
+	Xi_exp_Xb = exp_Xb * Xi;
+	Xi_exp_Xb_agg = aggregate (target = Xi_exp_Xb, groups = RT, fn = "sum"); 
+	
+	Xi_exp_Xb_agg_rev_agg = cumsum (rev(Xi_exp_Xb_agg));
+	H_2_num = X_exp_Xb_rev_agg[,i:D] * Xi_exp_Xb_agg_rev_agg; # numerator
+	H_2 = H_2_num / (D_r_rev ^ 2);
+	H[i,i:D] = colSums ((H_1 - H_2) * d_r_rev);
+	#-----------End calculating H--------------------
+}
+H = H + t(H) - diag( diag (H));
+H0 = H0 + t(H0) - diag( diag (H0));
+
+
+# compute standard error for betas
+H_inv = inv (H);
+se_b = sqrt (diag (H_inv));
+
+# compute exp(b), Z, Pr[>|Z|]
+exp_b = exp (b);
+Z = b / se_b;
+P = matrix (0, rows = D, cols = 1);
+parfor (i in 1:D) {
+	P[i,1] = 2 - 2 * (cdf (target = abs (as.scalar (Z[i,1])), dist = "normal"));
+}
+
+# compute confidence intervals for b
+z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
+CI_l = b - se_b * z_alpha_2;
+CI_r = b - se_b + z_alpha_2;
+
+######## SOME STATISTICS AND TESTS
+# no. of records
+S_str = "no. of records " + N;
+
+# no.of events
+S_str = append (S_str, "no. of events " + sum (E));
+
+# log-likelihood
+loglik = -o;
+S_str = append (S_str, "loglik " + loglik + " ");
+
+# AIC = -2 * loglik + 2 * D
+AIC = -2 * loglik + 2 * D;
+S_str = append (S_str, "AIC " + AIC + " ");
+
+# Wald test
+wald_t = as.scalar (t(b) %*% H %*% b);
+wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
+T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
+
+# Likelihood ratio test
+lratio_t = 2 * o_init - 2 * o;
+lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
+T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
+
+
+H0_inv = inv (H0);
+score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
+score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
+T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
+
+# Rsquare (Cox & Snell)
+Rsquare = 1 - exp (-lratio_t / N);  
+Rsquare_max = 1 - exp (-2 * o_init / N);
+S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
+S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
+
+M = matrix (0, rows = D, cols = 7);
+M[,1] = b;
+M[,2] = exp_b;
+M[,3] = se_b;
+M[,4] = Z;
+M[,5] = P;
+M[,6] = CI_l;
+M[,7] = CI_r;
+
+write (M, fileM, format = fmtO);
+if (fileS != " ") {
+	write (S_str, fileS, format = fmtO);
+} else {
+	print (S_str);
+}
+if (fileT != " ") {
+	write (T_str, fileT, format = fmtO);
+} else {
+	print (T_str);
+}
+# needed for prediction
+write (RT, fileRT, format = fmtO);
+write (H_inv, fileCOV, format = fmtO);
+write (X_orig, fileXO, format = fmtO);
+
+
+####### UDFS FOR TRUST REGION NEWTON METHOD
+
+ensure_trust_bound =
+    function (double x, double a, double b, double c)
+    return (double x_new, boolean is_violated)
+{
+    if (a * x^2 + b * x + c > 0)
+    {
+        is_violated = TRUE;
+        rad = sqrt (b ^ 2 - 4 * a * c);
+        if (b >= 0) {
+            x_new = - (2 * c) / (b + rad);
+        } else {
+            x_new = - (b - rad) / (2 * a);
+        }
+    } else {
+        is_violated = FALSE;
+        x_new = x;
+    }
+}
+
+update_trust_bound =
+    function (double delta,
+              double sb_distance,
+              double so_exact,
+              double so_linear_approx,
+              double so_quadratic_approx)
+    return   (double delta)
+{
+    sigma1 = 0.25;
+    sigma2 = 0.5;
+    sigma3 = 4.0;
+
+    if (so_exact <= so_linear_approx) {
+       alpha = sigma3;
+    } else {
+       alpha = max (sigma1, - 0.5 * so_linear_approx / (so_exact - so_linear_approx));
+    }
+
+    rho = so_exact / so_quadratic_approx;
+    if (rho < 0.0001) {
+        delta = min (max (alpha, sigma1) * sb_distance, sigma2 * delta);
+    } else { if (rho < 0.25) {
+        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma2 * delta));
+    } else { if (rho < 0.75) {
+        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma3 * delta));
+    } else {
+        delta = max (delta, min (alpha * sb_distance, sigma3 * delta));
+    }}} 
+}


[28/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.R
index fa3b66a..1375435 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.R
@@ -1,45 +1,45 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $SS_HOME is set to the home of the R script
-# Rscript $SS_HOME/ScaleScalePearsonRWithWeightsTest.R $SS_HOME/in/ $SS_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("boot")
-# Usage: R --vanilla -args Xfile X < ScaleScaleTest.R
-
-#parseCommandArgs()
-######################
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-Y = readMM(paste(args[1], "Y.mtx", sep=""))
-WM = readMM(paste(args[1],"WM.mtx", sep=""))
-
-# create a matrix from X and Y vectors
-mat = cbind(X[,1], Y[,1]);
-
-# corr is a function in "boot" package
-R = corr(mat, WM[,1]);
-
-write(R, paste(args[2], "PearsonR", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $SS_HOME is set to the home of the R script
+# Rscript $SS_HOME/ScaleScalePearsonRWithWeightsTest.R $SS_HOME/in/ $SS_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("boot")
+# Usage: R --vanilla -args Xfile X < ScaleScaleTest.R
+
+#parseCommandArgs()
+######################
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+Y = readMM(paste(args[1], "Y.mtx", sep=""))
+WM = readMM(paste(args[1],"WM.mtx", sep=""))
+
+# create a matrix from X and Y vectors
+mat = cbind(X[,1], Y[,1]);
+
+# corr is a function in "boot" package
+R = corr(mat, WM[,1]);
+
+write(R, paste(args[2], "PearsonR", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.dml
index 82ec837..a3bcf33 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleScalePearsonRWithWeightsTest.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleScale.dml?
-# Assume $SS_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume X and Y are scale variables and both have 100000 rows
-# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" "$OUPUT_DIR/PearsonR"
-
-#X <- scale variable
-#Y <- scale variable
-#WM <- weights
-
-X = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-W = sum(WM);
-
-# weighted co-variance
-covXY = cov(X,Y,WM);
-
-# compute standard deviations for both X and Y by computing 2^nd central moment
-m2X = moment(X,WM,2);
-m2Y = moment(Y,WM,2);
-sigmaX = sqrt(m2X * (W/(W-1.0)) );
-sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-# Pearson's R
-R = covXY / (sigmaX*sigmaY);
-
-write(R, $5);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleScale.dml?
+# Assume $SS_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume X and Y are scale variables and both have 100000 rows
+# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" "$OUPUT_DIR/PearsonR"
+
+#X <- scale variable
+#Y <- scale variable
+#WM <- weights
+
+X = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+W = sum(WM);
+
+# weighted co-variance
+covXY = cov(X,Y,WM);
+
+# compute standard deviations for both X and Y by computing 2^nd central moment
+m2X = moment(X,WM,2);
+m2Y = moment(Y,WM,2);
+sigmaX = sqrt(m2X * (W/(W-1.0)) );
+sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+# Pearson's R
+R = covXY / (sigmaX*sigmaY);
+
+write(R, $5);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/SimpleQuantileTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/SimpleQuantileTest.dml b/src/test/scripts/applications/descriptivestats/SimpleQuantileTest.dml
index 53aea26..9f7a9be 100644
--- a/src/test/scripts/applications/descriptivestats/SimpleQuantileTest.dml
+++ b/src/test/scripts/applications/descriptivestats/SimpleQuantileTest.dml
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-$$readhelper$$
-
-V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
-W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
-
-# median
-md = median(V) #quantile(V, 0.5)
-medianHelper1 = md * Helper;
-write(medianHelper1, "$$outdir$$median", format="text");
-
-# weighted median
-wmd = median(V,W) #quantile(V, W, 0.5)
-medianHelper2 = wmd * Helper;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+$$readhelper$$
+
+V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
+W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
+
+# median
+md = median(V) #quantile(V, 0.5)
+medianHelper1 = md * Helper;
+write(medianHelper1, "$$outdir$$median", format="text");
+
+# weighted median
+wmd = median(V,W) #quantile(V, W, 0.5)
+medianHelper2 = wmd * Helper;
 write(medianHelper2, "$$outdir$$weighted_median", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.R b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.R
index a5986db..c7b8e8f 100644
--- a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.R
+++ b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.R
@@ -1,63 +1,63 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
-# command line invocation assuming $C_HOME is set to the home of the R script
-# Rscript $C_HOME/Categorical.R $C_HOME/in/ $C_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-#library("batch")
-library("Matrix")
-# Usage: R --vanilla -args Xfile X < DescriptiveStatistics.R
-
-#parseCommandArgs()
-######################
-
-V = readMM(paste(args[1], "vector.mtx", sep=""))
-W = readMM(paste(args[1], "weight.mtx", sep=""))
-
-tab = table(rep(V[,1],W[,1]))
-cat = t(as.numeric(names(tab)))
-Nc = t(as.vector(tab))
-
-# the number of categories of a categorical variable
-R = length(Nc)
-
-# total count
-s = sum(Nc)
-
-# percentage values of each categorical compare to the total case number
-Pc = Nc / s
-
-# all categorical values of a categorical variable
-#C = t(as.matrix(as.numeric(Nc > 0)))
-C= (Nc > 0)
-
-# mode
-mx = max(Nc)
-Mode = (Nc == mx)
-
-writeMM(as(t(Nc),"CsparseMatrix"), paste(args[2], "Nc", sep=""), format="text");
-write(R, paste(args[2], "R", sep=""));
-writeMM(as(t(Pc),"CsparseMatrix"), paste(args[2], "Pc", sep=""), format="text");
-writeMM(as(t(C),"CsparseMatrix"), paste(args[2], "C", sep=""), format="text");
-writeMM(as(t(Mode),"CsparseMatrix"), paste(args[2], "Mode", sep=""), format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
+# command line invocation assuming $C_HOME is set to the home of the R script
+# Rscript $C_HOME/Categorical.R $C_HOME/in/ $C_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+#library("batch")
+library("Matrix")
+# Usage: R --vanilla -args Xfile X < DescriptiveStatistics.R
+
+#parseCommandArgs()
+######################
+
+V = readMM(paste(args[1], "vector.mtx", sep=""))
+W = readMM(paste(args[1], "weight.mtx", sep=""))
+
+tab = table(rep(V[,1],W[,1]))
+cat = t(as.numeric(names(tab)))
+Nc = t(as.vector(tab))
+
+# the number of categories of a categorical variable
+R = length(Nc)
+
+# total count
+s = sum(Nc)
+
+# percentage values of each categorical compare to the total case number
+Pc = Nc / s
+
+# all categorical values of a categorical variable
+#C = t(as.matrix(as.numeric(Nc > 0)))
+C= (Nc > 0)
+
+# mode
+mx = max(Nc)
+Mode = (Nc == mx)
+
+writeMM(as(t(Nc),"CsparseMatrix"), paste(args[2], "Nc", sep=""), format="text");
+write(R, paste(args[2], "R", sep=""));
+writeMM(as(t(Pc),"CsparseMatrix"), paste(args[2], "Pc", sep=""), format="text");
+writeMM(as(t(C),"CsparseMatrix"), paste(args[2], "C", sep=""), format="text");
+writeMM(as(t(Mode),"CsparseMatrix"), paste(args[2], "Mode", sep=""), format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml
index d838cff..cacc311 100644
--- a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml
+++ b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml
@@ -1,56 +1,56 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Categorical.dml?
-# Assume C_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for vector
-# hadoop jar SystemML.jar -f $C_HOME/Categorical.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/W" "$OUTPUT_DIR/Nc" "$OUPUT_DIR/R" "$OUTPUT_DIR/Pc" "$OUTPUT_DIR/C" "$OUTPUT_DIR/Mode"
-
-V = read($1, rows=$2, cols=1, format="text")
-W = read($3, rows=$2, cols=1, format="text")
-
-# a set of number of values specify the number of cases of each categorical
-Nc = table(V, 1, W);
-
-# the number of categories of a categorical variable
-R = nrow(Nc)
-
-# total count
-s = sum(Nc)
-
-# percentage values of each categorical compare to the total case number
-Pc = Nc / s
-
-# all categorical values of a categorical variable
-C = ppred(Nc, 0, ">")
-
-# mode
-mx = max(Nc)
-Mode =  ppred(Nc, mx, "==")
-
-write(Nc, $4, format="text")
-write(R, $5)
-write(Pc, $6, format="text")
-write(C, $7, format="text")
-write(Mode, $8, format="text")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Categorical.dml?
+# Assume C_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for vector
+# hadoop jar SystemML.jar -f $C_HOME/Categorical.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/W" "$OUTPUT_DIR/Nc" "$OUPUT_DIR/R" "$OUTPUT_DIR/Pc" "$OUTPUT_DIR/C" "$OUTPUT_DIR/Mode"
+
+V = read($1, rows=$2, cols=1, format="text")
+W = read($3, rows=$2, cols=1, format="text")
+
+# a set of number of values specify the number of cases of each categorical
+Nc = table(V, 1, W);
+
+# the number of categories of a categorical variable
+R = nrow(Nc)
+
+# total count
+s = sum(Nc)
+
+# percentage values of each categorical compare to the total case number
+Pc = Nc / s
+
+# all categorical values of a categorical variable
+C = ppred(Nc, 0, ">")
+
+# mode
+mx = max(Nc)
+Mode =  ppred(Nc, mx, "==")
+
+write(Nc, $4, format="text")
+write(R, $5)
+write(Pc, $6, format="text")
+write(C, $7, format="text")
+write(Mode, $8, format="text")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R
index 866ca02..eba3d1c 100644
--- a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R
+++ b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
-# command line invocation assuming $S_HOME is set to the home of the R script
-# Rscript $S_HOME/WeightedScaleTest.R $S_HOME/in/ $S_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-options(repos="http://cran.stat.ucla.edu/") 
-is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
-
-is_plotrix = is.installed("plotrix");
-if ( !is_plotrix ) {
-install.packages("plotrix");
-} 
-library("plotrix");
-
-is_psych = is.installed("psych");
-if ( !is_psych ) {
-install.packages("psych");
-} 
-library("psych")
-
-is_moments = is.installed("moments");
-if( !is_moments){
-install.packages("moments");
-}
-library("moments")
-
-#library("batch")
-library("Matrix")
-
-# Usage: R --vanilla -args Xfile X < DescriptiveStatistics.R
-
-#parseCommandArgs()
-######################
-
-Temp = readMM(paste(args[1], "vector.mtx", sep=""))
-W = readMM(paste(args[1], "weight.mtx", sep=""))
-P = readMM(paste(args[1], "prob.mtx", sep=""))
-
-W = round(W)
-
-V=rep(Temp[,1],W[,1])
-
-n = sum(W)
-
-# sum
-s1 = sum(V)
-
-# mean
-mu = s1/n
-
-# variances
-var = var(V)
-
-# standard deviations
-std_dev = sd(V, na.rm = FALSE)
-
-# standard errors of mean
-SE = std.error(V, na.rm)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
-#har_mu = harmonic.mean(V)
-
-# geometric means is not currently supported.
-#geom_mu = geometric.mean(V)
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = n^2*moment(V, order=3, central=TRUE)/((n-1)*(n-2)*std_dev^3)
-
-# standard error of skewness (not sure how it is defined without the weight)
-se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
-
-m2 = moment(V, order=2, central=TRUE)
-m4 = moment(V, order=4, central=TRUE)
-
-# Kurtosis (using binomial formula)
-g2 = (n^2*(n+1)*m4-3*m2^2*n^2*(n-1))/((n-1)*(n-2)*(n-3)*var^2)
-
-# Standard error of Kurtosis (not sure how it is defined without the weight)
-se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
-
-# median
-md = median(V) #quantile(V, 0.5, type = 1)
-
-# quantile
-Q = t(quantile(V, P[,1], type = 1))
-
-# inter-quartile mean
-S=c(sort(V))
-
-q25d=n*0.25
-q75d=n*0.75
-q25i=ceiling(q25d)
-q75i=ceiling(q75d)
-
-iqm = sum(S[(q25i+1):q75i])
-iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-iqm = iqm/(n*0.5)
-
-#print(paste("IQM ", iqm));
-
-# outliers use ppred to describe it
-out_minus = t(as.numeric(Temp < mu-5*std_dev)*Temp) 
-out_plus = t(as.numeric(Temp > mu+5*std_dev)*Temp)
-
-write(mu, paste(args[2], "mean", sep=""));
-write(std_dev, paste(args[2], "std", sep=""));
-write(SE, paste(args[2], "se", sep=""));
-write(var, paste(args[2], "var", sep=""));
-write(cv, paste(args[2], "cv", sep=""));
-# write(har_mu),paste(args[2], "har", sep=""));
-# write(geom_mu, paste(args[2], "geom", sep=""));
-write(mn, paste(args[2], "min", sep=""));
-write(mx, paste(args[2], "max", sep=""));
-write(rng, paste(args[2], "rng", sep=""));
-write(g1, paste(args[2], "g1", sep=""));
-write(se_g1, paste(args[2], "se_g1", sep=""));
-write(g2, paste(args[2], "g2", sep=""));
-write(se_g2, paste(args[2], "se_g2", sep=""));
-write(md, paste(args[2], "median", sep=""));
-write(iqm, paste(args[2], "iqm", sep=""));
-writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
-writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
-writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
+# command line invocation assuming $S_HOME is set to the home of the R script
+# Rscript $S_HOME/WeightedScaleTest.R $S_HOME/in/ $S_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+options(repos="http://cran.stat.ucla.edu/") 
+is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
+
+is_plotrix = is.installed("plotrix");
+if ( !is_plotrix ) {
+install.packages("plotrix");
+} 
+library("plotrix");
+
+is_psych = is.installed("psych");
+if ( !is_psych ) {
+install.packages("psych");
+} 
+library("psych")
+
+is_moments = is.installed("moments");
+if( !is_moments){
+install.packages("moments");
+}
+library("moments")
+
+#library("batch")
+library("Matrix")
+
+# Usage: R --vanilla -args Xfile X < DescriptiveStatistics.R
+
+#parseCommandArgs()
+######################
+
+Temp = readMM(paste(args[1], "vector.mtx", sep=""))
+W = readMM(paste(args[1], "weight.mtx", sep=""))
+P = readMM(paste(args[1], "prob.mtx", sep=""))
+
+W = round(W)
+
+V=rep(Temp[,1],W[,1])
+
+n = sum(W)
+
+# sum
+s1 = sum(V)
+
+# mean
+mu = s1/n
+
+# variances
+var = var(V)
+
+# standard deviations
+std_dev = sd(V, na.rm = FALSE)
+
+# standard errors of mean
+SE = std.error(V, na.rm)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
+#har_mu = harmonic.mean(V)
+
+# geometric means is not currently supported.
+#geom_mu = geometric.mean(V)
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = n^2*moment(V, order=3, central=TRUE)/((n-1)*(n-2)*std_dev^3)
+
+# standard error of skewness (not sure how it is defined without the weight)
+se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
+
+m2 = moment(V, order=2, central=TRUE)
+m4 = moment(V, order=4, central=TRUE)
+
+# Kurtosis (using binomial formula)
+g2 = (n^2*(n+1)*m4-3*m2^2*n^2*(n-1))/((n-1)*(n-2)*(n-3)*var^2)
+
+# Standard error of Kurtosis (not sure how it is defined without the weight)
+se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
+
+# median
+md = median(V) #quantile(V, 0.5, type = 1)
+
+# quantile
+Q = t(quantile(V, P[,1], type = 1))
+
+# inter-quartile mean
+S=c(sort(V))
+
+q25d=n*0.25
+q75d=n*0.75
+q25i=ceiling(q25d)
+q75i=ceiling(q75d)
+
+iqm = sum(S[(q25i+1):q75i])
+iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+iqm = iqm/(n*0.5)
+
+#print(paste("IQM ", iqm));
+
+# outliers use ppred to describe it
+out_minus = t(as.numeric(Temp < mu-5*std_dev)*Temp) 
+out_plus = t(as.numeric(Temp > mu+5*std_dev)*Temp)
+
+write(mu, paste(args[2], "mean", sep=""));
+write(std_dev, paste(args[2], "std", sep=""));
+write(SE, paste(args[2], "se", sep=""));
+write(var, paste(args[2], "var", sep=""));
+write(cv, paste(args[2], "cv", sep=""));
+# write(har_mu),paste(args[2], "har", sep=""));
+# write(geom_mu, paste(args[2], "geom", sep=""));
+write(mn, paste(args[2], "min", sep=""));
+write(mx, paste(args[2], "max", sep=""));
+write(rng, paste(args[2], "rng", sep=""));
+write(g1, paste(args[2], "g1", sep=""));
+write(se_g1, paste(args[2], "se_g1", sep=""));
+write(g2, paste(args[2], "g2", sep=""));
+write(se_g2, paste(args[2], "se_g2", sep=""));
+write(md, paste(args[2], "median", sep=""));
+write(iqm, paste(args[2], "iqm", sep=""));
+writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
+writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
+writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml
index 6968c1e..aac58de 100644
--- a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml
+++ b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml
@@ -1,125 +1,125 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Scale.dml?
-# Assume S_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for V and rows = 5 for P
-# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/weight "$INPUT_DIR/prob 5
-# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
-# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
-# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
-# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
-# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
-
-V = read($1, rows=$2, cols=1, format="text")
-W = read($3, rows=$2, cols=1, format="text")
-P = read($4, rows=$5, cols=1, format="text")
-
-W = round(W)
-
-n = nrow(V)
-
-wt = sum(W)
-
-# sum
-s1 = sum(V*W)
-
-# 2nd central moment
-m2 = moment(V, W, 2)
-
-# 3rd central moment
-m3 = moment(V, W, 3)
-
-# 4th central moment
-m4 = moment(V, W, 4)
-
-# mean
-mu = mean(V, W)
-
-# variances
-var = m2*wt/(wt-1.0)
-
-# standard deviations
-std_dev = sqrt(var)
-
-# standard errors of mean
-SE = std_dev/sqrt(wt)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
-#har_mu = wt/(sum((1.0/V)*W))
-
-# geometric means is not currently supported.
-#geom_mu = wt*exp(sum(log(V)*W)/wt)
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = wt^2*m3/((wt-1)*(wt-2)*std_dev^3)
-
-# standard error of skewness
-se_g1=sqrt( 6*wt*(wt-1) / ((wt-2)*(wt+1)*(wt+3)) )
-
-# Kurtosis (using binomial formula)
-g2 = (wt^2*(wt+1)*m4-3*m2^2*wt^2*(wt-1))/((wt-1)*(wt-2)*(wt-3)*std_dev^4)
-
-# Standard error of Kurtosis
-se_g2= sqrt( (4*(wt^2-1)*se_g1^2)/((wt+5)*(wt-3)) )
-
-# outliers use ppred to describe it
-out_minus = ppred(V, mu-5*std_dev, "<")*V 
-out_plus = ppred(V, mu+5*std_dev, ">")*V
-
-# median
-md = median(V,W); #quantile(V, W, 0.5)
-
-# quantile
-Q = quantile(V, W, P)
-
-# inter-quartile mean
-iqm = interQuartileMean(V, W)
-
-write(mu, $6);
-write(std_dev, $7);
-write(SE, $8);
-write(var, $9);
-write(cv, $10);
-write(mn, $11);
-write(mx, $12);
-write(rng, $13);
-write(g1, $14);
-write(se_g1, $15);
-write(g2, $16);
-write(se_g2, $17);
-write(md, $18);
-write(iqm, $19);
-write(out_minus, $20, format="text");
-write(out_plus, $21, format="text");
-write(Q, $22, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Scale.dml?
+# Assume S_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for V and rows = 5 for P
+# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/weight "$INPUT_DIR/prob 5
+# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
+# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
+# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
+# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
+# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
+
+V = read($1, rows=$2, cols=1, format="text")
+W = read($3, rows=$2, cols=1, format="text")
+P = read($4, rows=$5, cols=1, format="text")
+
+W = round(W)
+
+n = nrow(V)
+
+wt = sum(W)
+
+# sum
+s1 = sum(V*W)
+
+# 2nd central moment
+m2 = moment(V, W, 2)
+
+# 3rd central moment
+m3 = moment(V, W, 3)
+
+# 4th central moment
+m4 = moment(V, W, 4)
+
+# mean
+mu = mean(V, W)
+
+# variances
+var = m2*wt/(wt-1.0)
+
+# standard deviations
+std_dev = sqrt(var)
+
+# standard errors of mean
+SE = std_dev/sqrt(wt)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
+#har_mu = wt/(sum((1.0/V)*W))
+
+# geometric means is not currently supported.
+#geom_mu = wt*exp(sum(log(V)*W)/wt)
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = wt^2*m3/((wt-1)*(wt-2)*std_dev^3)
+
+# standard error of skewness
+se_g1=sqrt( 6*wt*(wt-1) / ((wt-2)*(wt+1)*(wt+3)) )
+
+# Kurtosis (using binomial formula)
+g2 = (wt^2*(wt+1)*m4-3*m2^2*wt^2*(wt-1))/((wt-1)*(wt-2)*(wt-3)*std_dev^4)
+
+# Standard error of Kurtosis
+se_g2= sqrt( (4*(wt^2-1)*se_g1^2)/((wt+5)*(wt-3)) )
+
+# outliers use ppred to describe it
+out_minus = ppred(V, mu-5*std_dev, "<")*V 
+out_plus = ppred(V, mu+5*std_dev, ">")*V
+
+# median
+md = median(V,W); #quantile(V, W, 0.5)
+
+# quantile
+Q = quantile(V, W, P)
+
+# inter-quartile mean
+iqm = interQuartileMean(V, W)
+
+write(mu, $6);
+write(std_dev, $7);
+write(SE, $8);
+write(var, $9);
+write(cv, $10);
+write(mn, $11);
+write(mx, $12);
+write(rng, $13);
+write(g1, $14);
+write(se_g1, $15);
+write(g2, $16);
+write(se_g2, $17);
+write(md, $18);
+write(iqm, $19);
+write(out_minus, $20, format="text");
+write(out_plus, $21, format="text");
+write(Q, $22, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/glm/GLM.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/glm/GLM.R b/src/test/scripts/applications/glm/GLM.R
index d569319..32b51f9 100644
--- a/src/test/scripts/applications/glm/GLM.R
+++ b/src/test/scripts/applications/glm/GLM.R
@@ -1,141 +1,141 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.applications.GLMTest.java
-# Intended to solve GLM Regression using R, in order to compare against the DML implementation
-# INPUT 1: Matrix X [rows, columns]
-# INPUT 2: Matrix y [rows, 1]
-# INPUT 3-6: Distribution family and link, see below:
-# ---------------------------------------------
-#   Dst Var Lnk Lnk   Distribution       Cano-
-#   typ pow typ pow   Family.link        nical?
-# ---------------------------------------------
-#    1  0.0  1 -1.0   Gaussian.inverse
-#    1  0.0  1  0.0   Gaussian.log
-#    1  0.0  1  1.0   Gaussian.id         Yes
-#    1  1.0  1  0.0   Poisson.log         Yes
-#    1  1.0  1  0.5   Poisson.sqrt
-#    1  1.0  1  1.0   Poisson.id
-#    1  2.0  1 -1.0   Gamma.inverse       Yes
-#    1  2.0  1  0.0   Gamma.log
-#    1  2.0  1  1.0   Gamma.id
-#    1  3.0  1 -2.0   InvGaussian.1/mu^2  Yes
-#    1  3.0  1 -1.0   InvGaussian.inverse
-#    1  3.0  1  0.0   InvGaussian.log
-#    1  3.0  1  1.0   InvGaussian.id
-#    1   *   1   *    AnyVariance.AnyLink
-# ---------------------------------------------
-#    2 -1.0  *   *    Binomial {-1, 1}
-#    2  0.0  *   *    Binomial { 0, 1}
-#    2  1.0  *   *    Binomial two-column
-#    2   *   1  0.0   Binomial.log
-#    2   *   2   *    Binomial.logit      Yes
-#    2   *   3   *    Binomial.probit
-#    2   *   4   *    Binomial.cloglog
-#    2   *   5   *    Binomial.cauchit
-# ---------------------------------------------
-# INPUT 3: (int) Distribution type
-# INPUT 4: (double) For Power families: Variance power of the mean
-# INPUT 5: (int) Link function type
-# INPUT 6: (double) Link as power of the mean
-# INPUT 7: (int) Intercept: 0 = no, 1 = yes
-# INPUT 8: (double) tolerance (epsilon)
-# INPUT 9: the regression coefficients output file
-# OUTPUT : Matrix beta [columns, 1]
-#
-# Assume that $GLMR_HOME is set to the home of the R script
-# Assume input and output directories are $GLMR_HOME/in/ and $GLMR_HOME/expected/
-# Rscript $GLMR_HOME/GLM.R $GLMR_HOME/in/X.mtx $GLMR_HOME/in/y.mtx 2 0.0 2 0.0 1 0.00000001 $GLMR_HOME/expected/w.mtx
-
-args <- commandArgs (TRUE);
-
-library ("Matrix");
-# library ("batch");
-
-options (warn = -1);
-
-X_here <- readMM (args[1]);  # (paste (args[1], "X.mtx", sep=""));
-y_here <- readMM (args[2]);  # (paste (args[1], "y.mtx", sep=""));
-
-num_records  <- nrow (X_here);
-num_features <- ncol (X_here);
-dist_type  <- as.integer (args[3]);
-dist_param <- as.numeric (args[4]);
-link_type  <- as.integer (args[5]);
-link_power <- as.numeric (args[6]);
-icept <- as.integer (args[7]);
-eps_n <- as.numeric (args[8]);
-
-f_ly <- gaussian ();
-var_power <- dist_param;
-
-if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  1.0) { f_ly <- gaussian (link = "identity");         } else
-if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == -1.0) { f_ly <- gaussian (link = "inverse");          } else
-if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  0.0) { f_ly <- gaussian (link = "log");              } else
-if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  1.0) { f_ly <-  poisson (link = "identity");         } else
-if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.0) { f_ly <-  poisson (link = "log");              } else
-if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.5) { f_ly <-  poisson (link = "sqrt");             } else
-if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  1.0) { f_ly <-    Gamma (link = "identity");         } else
-if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == -1.0) { f_ly <-    Gamma (link = "inverse");          } else
-if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  0.0) { f_ly <-    Gamma (link = "log");              } else
-if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  1.0) { f_ly <- inverse.gaussian (link = "identity"); } else
-if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -1.0) { f_ly <- inverse.gaussian (link = "inverse");  } else
-if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  0.0) { f_ly <- inverse.gaussian (link = "log");      } else
-if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -2.0) { f_ly <- inverse.gaussian (link = "1/mu^2");   } else
-if (dist_type == 2                    & link_type == 1 & link_power ==  0.0) { f_ly <- binomial (link = "log");              } else
-if (dist_type == 2                    & link_type == 1 & link_power ==  1.0) { f_ly <- binomial (link = "identity");         } else
-if (dist_type == 2                    & link_type == 1 & link_power ==  0.5) { f_ly <- binomial (link = "sqrt");             } else
-if (dist_type == 2                    & link_type == 2                     ) { f_ly <- binomial (link = "logit");            } else
-if (dist_type == 2                    & link_type == 3                     ) { f_ly <- binomial (link = "probit");           } else
-if (dist_type == 2                    & link_type == 4                     ) { f_ly <- binomial (link = "cloglog");          } else
-if (dist_type == 2                    & link_type == 5                     ) { f_ly <- binomial (link = "cauchit");          }
-
-# quasi(link = "identity", variance = "constant")
-# quasibinomial(link = "logit")
-# quasipoisson(link = "log")
-
-if (dist_type == 2 & dist_param != 1.0) {
-    y_here <- (y_here - dist_param) / (1.0 - dist_param);
-}
-
-# epsilon 	tolerance: the iterations converge when |dev - devold|/(|dev| + 0.1) < epsilon.
-# maxit 	integer giving the maximal number of IWLS iterations.
-# trace 	logical indicating if output should be produced for each iteration.
-#
-c_rol <- glm.control (epsilon = eps_n, maxit = 100, trace = FALSE);
-
-X_matrix = as.matrix (X_here);
-y_matrix = as.matrix (y_here);
-
-if (icept == 0) {
-    glmOut <- glm (y_matrix ~ X_matrix - 1, family = f_ly, control = c_rol);
-    betas <- coef (glmOut);
-} else {
-    glmOut <- glm (y_matrix ~ X_matrix    , family = f_ly, control = c_rol);
-    betas <- coef (glmOut);
-    beta_intercept = betas [1];
-    betas [1 : num_features] = betas [2 : (num_features + 1)];
-    betas [num_features + 1] = beta_intercept;
-}
-
-print (c("Deviance", glmOut$deviance));
-writeMM (as (betas, "CsparseMatrix"), args[9], format = "text");
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.applications.GLMTest.java
+# Intended to solve GLM Regression using R, in order to compare against the DML implementation
+# INPUT 1: Matrix X [rows, columns]
+# INPUT 2: Matrix y [rows, 1]
+# INPUT 3-6: Distribution family and link, see below:
+# ---------------------------------------------
+#   Dst Var Lnk Lnk   Distribution       Cano-
+#   typ pow typ pow   Family.link        nical?
+# ---------------------------------------------
+#    1  0.0  1 -1.0   Gaussian.inverse
+#    1  0.0  1  0.0   Gaussian.log
+#    1  0.0  1  1.0   Gaussian.id         Yes
+#    1  1.0  1  0.0   Poisson.log         Yes
+#    1  1.0  1  0.5   Poisson.sqrt
+#    1  1.0  1  1.0   Poisson.id
+#    1  2.0  1 -1.0   Gamma.inverse       Yes
+#    1  2.0  1  0.0   Gamma.log
+#    1  2.0  1  1.0   Gamma.id
+#    1  3.0  1 -2.0   InvGaussian.1/mu^2  Yes
+#    1  3.0  1 -1.0   InvGaussian.inverse
+#    1  3.0  1  0.0   InvGaussian.log
+#    1  3.0  1  1.0   InvGaussian.id
+#    1   *   1   *    AnyVariance.AnyLink
+# ---------------------------------------------
+#    2 -1.0  *   *    Binomial {-1, 1}
+#    2  0.0  *   *    Binomial { 0, 1}
+#    2  1.0  *   *    Binomial two-column
+#    2   *   1  0.0   Binomial.log
+#    2   *   2   *    Binomial.logit      Yes
+#    2   *   3   *    Binomial.probit
+#    2   *   4   *    Binomial.cloglog
+#    2   *   5   *    Binomial.cauchit
+# ---------------------------------------------
+# INPUT 3: (int) Distribution type
+# INPUT 4: (double) For Power families: Variance power of the mean
+# INPUT 5: (int) Link function type
+# INPUT 6: (double) Link as power of the mean
+# INPUT 7: (int) Intercept: 0 = no, 1 = yes
+# INPUT 8: (double) tolerance (epsilon)
+# INPUT 9: the regression coefficients output file
+# OUTPUT : Matrix beta [columns, 1]
+#
+# Assume that $GLMR_HOME is set to the home of the R script
+# Assume input and output directories are $GLMR_HOME/in/ and $GLMR_HOME/expected/
+# Rscript $GLMR_HOME/GLM.R $GLMR_HOME/in/X.mtx $GLMR_HOME/in/y.mtx 2 0.0 2 0.0 1 0.00000001 $GLMR_HOME/expected/w.mtx
+
+args <- commandArgs (TRUE);
+
+library ("Matrix");
+# library ("batch");
+
+options (warn = -1);
+
+X_here <- readMM (args[1]);  # (paste (args[1], "X.mtx", sep=""));
+y_here <- readMM (args[2]);  # (paste (args[1], "y.mtx", sep=""));
+
+num_records  <- nrow (X_here);
+num_features <- ncol (X_here);
+dist_type  <- as.integer (args[3]);
+dist_param <- as.numeric (args[4]);
+link_type  <- as.integer (args[5]);
+link_power <- as.numeric (args[6]);
+icept <- as.integer (args[7]);
+eps_n <- as.numeric (args[8]);
+
+f_ly <- gaussian ();
+var_power <- dist_param;
+
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  1.0) { f_ly <- gaussian (link = "identity");         } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == -1.0) { f_ly <- gaussian (link = "inverse");          } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  0.0) { f_ly <- gaussian (link = "log");              } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  1.0) { f_ly <-  poisson (link = "identity");         } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.0) { f_ly <-  poisson (link = "log");              } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.5) { f_ly <-  poisson (link = "sqrt");             } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  1.0) { f_ly <-    Gamma (link = "identity");         } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == -1.0) { f_ly <-    Gamma (link = "inverse");          } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  0.0) { f_ly <-    Gamma (link = "log");              } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  1.0) { f_ly <- inverse.gaussian (link = "identity"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -1.0) { f_ly <- inverse.gaussian (link = "inverse");  } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  0.0) { f_ly <- inverse.gaussian (link = "log");      } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -2.0) { f_ly <- inverse.gaussian (link = "1/mu^2");   } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  0.0) { f_ly <- binomial (link = "log");              } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  1.0) { f_ly <- binomial (link = "identity");         } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  0.5) { f_ly <- binomial (link = "sqrt");             } else
+if (dist_type == 2                    & link_type == 2                     ) { f_ly <- binomial (link = "logit");            } else
+if (dist_type == 2                    & link_type == 3                     ) { f_ly <- binomial (link = "probit");           } else
+if (dist_type == 2                    & link_type == 4                     ) { f_ly <- binomial (link = "cloglog");          } else
+if (dist_type == 2                    & link_type == 5                     ) { f_ly <- binomial (link = "cauchit");          }
+
+# quasi(link = "identity", variance = "constant")
+# quasibinomial(link = "logit")
+# quasipoisson(link = "log")
+
+if (dist_type == 2 & dist_param != 1.0) {
+    y_here <- (y_here - dist_param) / (1.0 - dist_param);
+}
+
+# epsilon 	tolerance: the iterations converge when |dev - devold|/(|dev| + 0.1) < epsilon.
+# maxit 	integer giving the maximal number of IWLS iterations.
+# trace 	logical indicating if output should be produced for each iteration.
+#
+c_rol <- glm.control (epsilon = eps_n, maxit = 100, trace = FALSE);
+
+X_matrix = as.matrix (X_here);
+y_matrix = as.matrix (y_here);
+
+if (icept == 0) {
+    glmOut <- glm (y_matrix ~ X_matrix - 1, family = f_ly, control = c_rol);
+    betas <- coef (glmOut);
+} else {
+    glmOut <- glm (y_matrix ~ X_matrix    , family = f_ly, control = c_rol);
+    betas <- coef (glmOut);
+    beta_intercept = betas [1];
+    betas [1 : num_features] = betas [2 : (num_features + 1)];
+    betas [num_features + 1] = beta_intercept;
+}
+
+print (c("Deviance", glmOut$deviance));
+writeMM (as (betas, "CsparseMatrix"), args[9], format = "text");
+


[16/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_naive-bayes.R b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
index f455c2c..cb0d00f 100644
--- a/src/test/scripts/applications/parfor/parfor_naive-bayes.R
+++ b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = 1
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-repClassSums = classSums %*% matrix(1,1,numFeatures);
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = 1
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+repClassSums = classSums %*% matrix(1,1,numFeatures);
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_univariate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate.R b/src/test/scripts/applications/parfor/parfor_univariate.R
index cb5dfb1..14f9f95 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate.R
+++ b/src/test/scripts/applications/parfor/parfor_univariate.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A1 <- readMM(paste(args[1], "D.mtx", sep=""))
-K1 <- readMM(paste(args[1], "K.mtx", sep=""))
-A <- as.matrix(A1);
-K <- as.matrix(K1);
-maxC = args[2];  
-
-
-# number of features/attributes
-n = ncol(A);
-
-# number of data records
-m = nrow(A);
-
-# number of statistics
-numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-max_kind = max(K);
-  
-# matrices to store computed statistics
-baseStats = array(0,dim=c(numBaseStats,n)); 
-
-if (maxC > 0) {
-  countsArray = array(0,dim=c(maxC,n)); 
-}
-  
-for(i in 1:n) {
-
-	# project out the i^th column
-	F = as.matrix(A[,i]);
-
-	kind = K[1,i];
-
-	if ( kind == 1 ) {
-		print("scale");
-		# compute SCALE statistics on the projected column
-		minimum = min(F);
-		maximum = max(F);
-		rng = maximum - minimum;
-
-		mu = mean(F);
-		m2 = moment(F, order=2, central=TRUE);
-		m3 = moment(F, order=3, central=TRUE);
-		m4 = moment(F, order=4, central=TRUE);
-
-		var = m/(m-1.0)*m2;
-    
-		std_dev = sqrt(var);
-		se = std_dev/sqrt(m);
-		cv = std_dev/mu;
-
-		g1 = m3/(std_dev^3);
-		g2 = m4/(std_dev^4) - 3;
-		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-		md = median(F); #quantile(F, 0.5, type = 1);
-
-		S = sort(F)
-		q25d=m*0.25
-		q75d=m*0.75
-		q25i=ceiling(q25d)
-		q75i=ceiling(q75d)
-
-		iqm = sum(S[(q25i+1):q75i])
-		iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-		iqm = iqm/(m*0.5)
-
-		#iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
-    
-		# place the computed statistics in output matrices
-		baseStats[1,i] = minimum;
-		baseStats[2,i] = maximum;
-		baseStats[3,i] = rng;
-
-		baseStats[4,i] = mu;
-		baseStats[5,i] = var;
-		baseStats[6,i] = std_dev;
-		baseStats[7,i] = se;
-		baseStats[8,i] = cv;
-
-		baseStats[9,i] = g1;
-		baseStats[10,i] = g2;
-		baseStats[11,i] = se_g1;
-		baseStats[12,i] = se_g2;
-
-		baseStats[13,i] = md;
-		baseStats[14,i] = iqm;
-	}
-	else {
-		if (kind == 2 | kind == 3) {
-			print("categorical");
-			
-			# check if the categorical column has valid values
-			minF = min(F);
-			if (minF <=0) {
-				print("ERROR: Categorical attributes can only take values starting from 1.");
-			}
-			else {
-				# compute CATEGORICAL statistics on the projected column
-				cat_counts = table(F);  # counts for each category
-				num_cat = nrow(cat_counts); # number of categories
-
-        mx = max(t(as.vector(cat_counts)))
-        mode = which(cat_counts == mx)    
-        
-      	numModes = length(cat_counts[ cat_counts==mx ]);
-
-				# place the computed statistics in output matrices
-				baseStats[15,i] = num_cat;
-				baseStats[16,i] = mode;
-				baseStats[17,i] = numModes;
-
-        if (max_kind > 1) {
-				  countsArray[1:length(cat_counts),i] = cat_counts;
-				}
-			}
-		}
-	}
-}
-
-writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
-if (max_kind > 1) {
-  writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A1 <- readMM(paste(args[1], "D.mtx", sep=""))
+K1 <- readMM(paste(args[1], "K.mtx", sep=""))
+A <- as.matrix(A1);
+K <- as.matrix(K1);
+maxC = args[2];  
+
+
+# number of features/attributes
+n = ncol(A);
+
+# number of data records
+m = nrow(A);
+
+# number of statistics
+numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+max_kind = max(K);
+  
+# matrices to store computed statistics
+baseStats = array(0,dim=c(numBaseStats,n)); 
+
+if (maxC > 0) {
+  countsArray = array(0,dim=c(maxC,n)); 
+}
+  
+for(i in 1:n) {
+
+	# project out the i^th column
+	F = as.matrix(A[,i]);
+
+	kind = K[1,i];
+
+	if ( kind == 1 ) {
+		print("scale");
+		# compute SCALE statistics on the projected column
+		minimum = min(F);
+		maximum = max(F);
+		rng = maximum - minimum;
+
+		mu = mean(F);
+		m2 = moment(F, order=2, central=TRUE);
+		m3 = moment(F, order=3, central=TRUE);
+		m4 = moment(F, order=4, central=TRUE);
+
+		var = m/(m-1.0)*m2;
+    
+		std_dev = sqrt(var);
+		se = std_dev/sqrt(m);
+		cv = std_dev/mu;
+
+		g1 = m3/(std_dev^3);
+		g2 = m4/(std_dev^4) - 3;
+		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+		md = median(F); #quantile(F, 0.5, type = 1);
+
+		S = sort(F)
+		q25d=m*0.25
+		q75d=m*0.75
+		q25i=ceiling(q25d)
+		q75i=ceiling(q75d)
+
+		iqm = sum(S[(q25i+1):q75i])
+		iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+		iqm = iqm/(m*0.5)
+
+		#iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
+    
+		# place the computed statistics in output matrices
+		baseStats[1,i] = minimum;
+		baseStats[2,i] = maximum;
+		baseStats[3,i] = rng;
+
+		baseStats[4,i] = mu;
+		baseStats[5,i] = var;
+		baseStats[6,i] = std_dev;
+		baseStats[7,i] = se;
+		baseStats[8,i] = cv;
+
+		baseStats[9,i] = g1;
+		baseStats[10,i] = g2;
+		baseStats[11,i] = se_g1;
+		baseStats[12,i] = se_g2;
+
+		baseStats[13,i] = md;
+		baseStats[14,i] = iqm;
+	}
+	else {
+		if (kind == 2 | kind == 3) {
+			print("categorical");
+			
+			# check if the categorical column has valid values
+			minF = min(F);
+			if (minF <=0) {
+				print("ERROR: Categorical attributes can only take values starting from 1.");
+			}
+			else {
+				# compute CATEGORICAL statistics on the projected column
+				cat_counts = table(F);  # counts for each category
+				num_cat = nrow(cat_counts); # number of categories
+
+        mx = max(t(as.vector(cat_counts)))
+        mode = which(cat_counts == mx)    
+        
+      	numModes = length(cat_counts[ cat_counts==mx ]);
+
+				# place the computed statistics in output matrices
+				baseStats[15,i] = num_cat;
+				baseStats[16,i] = mode;
+				baseStats[17,i] = numModes;
+
+        if (max_kind > 1) {
+				  countsArray[1:length(cat_counts),i] = cat_counts;
+				}
+			}
+		}
+	}
+}
+
+writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
+if (max_kind > 1) {
+  writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_univariate0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate0.dml b/src/test/scripts/applications/parfor/parfor_univariate0.dml
index 061d4a0..2a6a9c5 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate0.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate0.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-    	
-	for(i in 1:n) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F); #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+    	
+	for(i in 1:n) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F); #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_univariate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate1.dml b/src/test/scripts/applications/parfor/parfor_univariate1.dml
index e22fd86..1f120ef 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate1.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate1.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-	
-	parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F); #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+	
+	parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F); #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_univariate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate4.dml b/src/test/scripts/applications/parfor/parfor_univariate4.dml
index 1ebfcbd..8953c64 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate4.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate4.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-    
-	parfor(i in 1:n, check=0) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F) #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+    
+	parfor(i in 1:n, check=0) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F) #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/validation/LinearLogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/LinearLogisticRegression.dml b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
index b5d3955..473b2dc 100644
--- a/src/test/scripts/applications/validation/LinearLogisticRegression.dml
+++ b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Linear Logistic Regression using Trust Region methods. 
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
-# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
-# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
-
-# 100K dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
-
-# 1M dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
-
-
-# internal parameters
-tol = 0.001
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1 
-
-# read training data files
-X = read($1)
-intercept = $3
-
-D = ncol(X)
-#initialize w
-w = Rand(rows=D, cols=1, min=0.0, max=0.0);
-zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
-
-if (intercept == 1) {
- num_samples = nrow(X);
- ones  = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
- X = append(X, ones);
- zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
- w = t(append(t(w), zero_matrix));
- zeros_D = t(append(t(zeros_D), zero_matrix));
-}
-
-N = nrow(X)
-
-# read (training and test) labels
-y = read($2)
-
-maxiter = $4
-maxinneriter = 1000
-
-C = $5
-
-e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
-o = X %*% w
-logistic = 1.0/(1.0 + exp( -y * o))
-
-obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
-grad = w + C*t(X) %*% ((logistic - 1)*y)
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-
-# VS: change
-zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
-
-# boolean for convergence check
-
-converge = (delta < tol) | (iter > maxiter)
-norm_r2 = sum(grad*grad)
-
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-alpha = t(w) %*% w
-alpha2 = alpha
-
-while(!converge) {
- 
- norm_grad = sqrt(sum(grad*grad))
- 
- print("-- Outer Iteration = " + iter)
- objScalar = castAsScalar(obj)
- print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
- 
- # SOLVE TRUST REGION SUB-PROBLEM
- s = zeros_D
- os = zeros_N
- r = -grad
- d = r
- inneriter = 0
- innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
- while (!innerconverge) {
-     inneriter = inneriter + 1
-  norm_r2 = sum(r*r)
-  od = X %*% d
-  Hd = d + C*(t(X) %*% (logisticD*od))
-  alpha_deno = t(d) %*% Hd 
-  alpha = norm_r2 / alpha_deno
- 
-  s = s + castAsScalar(alpha) * d
-  os = os + castAsScalar(alpha) * od
-
-  sts = t(s) %*% s
-  delta2 = delta*delta 
-  stsScalar = castAsScalar(sts)
-  
-  shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
-  if (stsScalar > delta2) {
-      print("      --- cg reaches trust region boundary")
-   s = s - castAsScalar(alpha) * d
-   os = os - castAsScalar(alpha) * od
-   std = t(s) %*% d
-   dtd = t(d) %*% d
-   sts = t(s) %*% s
-   rad = sqrt(std*std + dtd*(delta2 - sts))
-   stdScalar = castAsScalar(std)
-   if(stdScalar >= 0) {
-    tau = (delta2 - sts)/(std + rad)
-   } 
-   else {
-    tau = (rad - std)/dtd
-   }
-      
-   s = s + castAsScalar(tau) * d
-   os = os + castAsScalar(tau) * od
-   r = r - castAsScalar(tau) * Hd
-   
-   #break
-   shouldBreak = TRUE;
-   innerconverge = TRUE;
-  
-  } 
-  
-  if (!shouldBreak) {
-   r = r - castAsScalar(alpha) * Hd
-   old_norm_r2 = norm_r2 
-   norm_r2 = sum(r*r)
-   beta = norm_r2/old_norm_r2
-   d = r + beta*d
-   innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
-  }
- }  
- 
- print("      --- Inner CG Iteration =  " + inneriter)
- # END TRUST REGION SUB-PROBLEM
- # compute rho, update w, obtain delta
- gs = t(s) %*% grad
- qk = -0.5*(gs - (t(s) %*% r))
- 
- wnew = w + s 
- onew = o + os
- logisticnew = 1.0/(1.0 + exp(-y * onew ))
- objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
- 
- actred = (obj - objnew)
- actredScalar = castAsScalar(actred)
- rho = actred / qk
- qkScalar = castAsScalar(qk)
- rhoScalar = castAsScalar(rho);
- snorm = sqrt(sum( s * s ))
-
- print("     Actual    = " + actredScalar)
- print("     Predicted = " + qkScalar)
- 
- if (iter==0) {
-    delta = min(delta, snorm)
- }
- alpha2 = objnew - obj - gs
- alpha2Scalar = castAsScalar(alpha2)
- if (alpha2Scalar <= 0) {
-    alpha = sigma3*e
- } 
- else {
-    ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
-    alpha = ascalar*e
- }
-
- if (rhoScalar > eta0) {
-  
-  w = wnew
-  o = onew
-  grad = w + C*t(X) %*% ((logisticnew - 1) * y )
-  norm_grad = sqrt(sum(grad*grad))
-  logisticD = logisticnew * (1 - logisticnew)
-  obj = objnew 
- } 
-
- alphaScalar = castAsScalar(alpha)
- if (rhoScalar < eta0){
-  delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
- }
- else {
-  if (rhoScalar < eta1){
-   delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
-  }
-  else { 
-   if (rhoScalar < eta2) {
-    delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
-   }
-   else {
-    delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
-   }
-  }
- } 
- 
- o2 = y * o
- correct = sum(ppred(o2, 0, ">"))
- accuracy = correct*100.0/N 
- iter = iter + 1
- #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
- converge = (norm_grad < tol) | (iter > maxiter)
-
- print("     Delta =  " + delta)
- print("     Training Accuracy =  " +  accuracy)
- print("     Correct =  " + correct)
- print("     OuterIter =  " + iter)
- print("     Converge =  " + converge)
-} 
-
-write(w, $6, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Linear Logistic Regression using Trust Region methods. 
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
+# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
+# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
+
+# 100K dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
+
+# 1M dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
+
+
+# internal parameters
+tol = 0.001
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1 
+
+# read training data files
+X = read($1)
+intercept = $3
+
+D = ncol(X)
+#initialize w
+w = Rand(rows=D, cols=1, min=0.0, max=0.0);
+zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
+
+if (intercept == 1) {
+ num_samples = nrow(X);
+ ones  = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
+ X = append(X, ones);
+ zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
+ w = t(append(t(w), zero_matrix));
+ zeros_D = t(append(t(zeros_D), zero_matrix));
+}
+
+N = nrow(X)
+
+# read (training and test) labels
+y = read($2)
+
+maxiter = $4
+maxinneriter = 1000
+
+C = $5
+
+e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
+o = X %*% w
+logistic = 1.0/(1.0 + exp( -y * o))
+
+obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
+grad = w + C*t(X) %*% ((logistic - 1)*y)
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+
+# VS: change
+zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
+
+# boolean for convergence check
+
+converge = (delta < tol) | (iter > maxiter)
+norm_r2 = sum(grad*grad)
+
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+alpha = t(w) %*% w
+alpha2 = alpha
+
+while(!converge) {
+ 
+ norm_grad = sqrt(sum(grad*grad))
+ 
+ print("-- Outer Iteration = " + iter)
+ objScalar = castAsScalar(obj)
+ print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
+ 
+ # SOLVE TRUST REGION SUB-PROBLEM
+ s = zeros_D
+ os = zeros_N
+ r = -grad
+ d = r
+ inneriter = 0
+ innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
+ while (!innerconverge) {
+     inneriter = inneriter + 1
+  norm_r2 = sum(r*r)
+  od = X %*% d
+  Hd = d + C*(t(X) %*% (logisticD*od))
+  alpha_deno = t(d) %*% Hd 
+  alpha = norm_r2 / alpha_deno
+ 
+  s = s + castAsScalar(alpha) * d
+  os = os + castAsScalar(alpha) * od
+
+  sts = t(s) %*% s
+  delta2 = delta*delta 
+  stsScalar = castAsScalar(sts)
+  
+  shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
+  if (stsScalar > delta2) {
+      print("      --- cg reaches trust region boundary")
+   s = s - castAsScalar(alpha) * d
+   os = os - castAsScalar(alpha) * od
+   std = t(s) %*% d
+   dtd = t(d) %*% d
+   sts = t(s) %*% s
+   rad = sqrt(std*std + dtd*(delta2 - sts))
+   stdScalar = castAsScalar(std)
+   if(stdScalar >= 0) {
+    tau = (delta2 - sts)/(std + rad)
+   } 
+   else {
+    tau = (rad - std)/dtd
+   }
+      
+   s = s + castAsScalar(tau) * d
+   os = os + castAsScalar(tau) * od
+   r = r - castAsScalar(tau) * Hd
+   
+   #break
+   shouldBreak = TRUE;
+   innerconverge = TRUE;
+  
+  } 
+  
+  if (!shouldBreak) {
+   r = r - castAsScalar(alpha) * Hd
+   old_norm_r2 = norm_r2 
+   norm_r2 = sum(r*r)
+   beta = norm_r2/old_norm_r2
+   d = r + beta*d
+   innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
+  }
+ }  
+ 
+ print("      --- Inner CG Iteration =  " + inneriter)
+ # END TRUST REGION SUB-PROBLEM
+ # compute rho, update w, obtain delta
+ gs = t(s) %*% grad
+ qk = -0.5*(gs - (t(s) %*% r))
+ 
+ wnew = w + s 
+ onew = o + os
+ logisticnew = 1.0/(1.0 + exp(-y * onew ))
+ objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
+ 
+ actred = (obj - objnew)
+ actredScalar = castAsScalar(actred)
+ rho = actred / qk
+ qkScalar = castAsScalar(qk)
+ rhoScalar = castAsScalar(rho);
+ snorm = sqrt(sum( s * s ))
+
+ print("     Actual    = " + actredScalar)
+ print("     Predicted = " + qkScalar)
+ 
+ if (iter==0) {
+    delta = min(delta, snorm)
+ }
+ alpha2 = objnew - obj - gs
+ alpha2Scalar = castAsScalar(alpha2)
+ if (alpha2Scalar <= 0) {
+    alpha = sigma3*e
+ } 
+ else {
+    ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
+    alpha = ascalar*e
+ }
+
+ if (rhoScalar > eta0) {
+  
+  w = wnew
+  o = onew
+  grad = w + C*t(X) %*% ((logisticnew - 1) * y )
+  norm_grad = sqrt(sum(grad*grad))
+  logisticD = logisticnew * (1 - logisticnew)
+  obj = objnew 
+ } 
+
+ alphaScalar = castAsScalar(alpha)
+ if (rhoScalar < eta0){
+  delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
+ }
+ else {
+  if (rhoScalar < eta1){
+   delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
+  }
+  else { 
+   if (rhoScalar < eta2) {
+    delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
+   }
+   else {
+    delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
+   }
+  }
+ } 
+ 
+ o2 = y * o
+ correct = sum(ppred(o2, 0, ">"))
+ accuracy = correct*100.0/N 
+ iter = iter + 1
+ #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
+ converge = (norm_grad < tol) | (iter > maxiter)
+
+ print("     Delta =  " + delta)
+ print("     Training Accuracy =  " +  accuracy)
+ print("     Correct =  " + correct)
+ print("     OuterIter =  " + iter)
+ print("     Converge =  " + converge)
+} 
+
+write(w, $6, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
index d06de67..b42a315 100644
--- a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
+++ b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
@@ -1,122 +1,122 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data to test linear logistic regression
-
-# 100K dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
-
-# 1M dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is the mean of the linear form (w^T X)
-# $4 is the st.dev. of the linear form (w^T X)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
-# $9 is 0 if no intercept and 1 if there is intercept
-# $10 controls sparsity in the generated data
-
-numSamples = $1
-numFeatures = $2
-meanLF = $3
-sigmaLF = $4
-addNoise = $8
-b = $9
-
-X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
-w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-
-if (b != 0) {
-	b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
-    X = append (X, b_mat);
-    numFeatures_plus_one = numFeatures + 1;
-    w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
-}
-
-[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
-if (sigmaLF != new_sigmaLF) {
-    print ("The standard deviation requirement on the linear form is TOO TIGHT!");
-    print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + "."); 
-}
-ot = X %*% w;
-
-if (b != 0) {
-    X = X [, 1:numFeatures];
-}
-
-emp_meanLF = sum (ot) / numSamples;
-emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
-print ("Empirical meanLF = " + emp_meanLF + ";   Empirical sigmaLF = " + emp_sigmaLF);
-
-prob = 1 / (1 + exp (- ot));
-
-if(addNoise == 1){
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
-	print("this data generator generates the same dataset for both noise=0 and noise=1")
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
-}
-
-print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + ";  nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
-
-Y = 1 - 2*ppred(prob, r, "<")
-
-write (w, $5, format="text");
-write (X, $6, format="binary");
-write (Y, $7, format="binary");
-
-
-# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
-# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
-# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
-scaleWeights = 
-    function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
-    return (Matrix[double] w_scaled, double new_sigmaLF)
-{
-    numFeatures = nrow (w_unscaled);
-    W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
-    W_ext [, 1] = w_unscaled;
-    S1 = colSums (X_data %*% W_ext);
-    TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
-    TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
-    TF [1, 2] = S1 [1, 2];
-    TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
-    TF [2, 2] = - S1 [1, 1];
-    TF = W_ext %*% TF;
-    Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
-    Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
-    new_sigmaLF = sigmaLF;
-    discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
-    if (discr > 0.0) {
-        new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
-        discr = -0.0;
-    }
-    t = Rand (rows = 2, cols = 1, min = 1, max = 1);
-    t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
-    w_scaled = TF %*% t;
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data to test linear logistic regression
+
+# 100K dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
+
+# 1M dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is the mean of the linear form (w^T X)
+# $4 is the st.dev. of the linear form (w^T X)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
+# $9 is 0 if no intercept and 1 if there is intercept
+# $10 controls sparsity in the generated data
+
+numSamples = $1
+numFeatures = $2
+meanLF = $3
+sigmaLF = $4
+addNoise = $8
+b = $9
+
+X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
+w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+
+if (b != 0) {
+	b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
+    X = append (X, b_mat);
+    numFeatures_plus_one = numFeatures + 1;
+    w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
+}
+
+[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
+if (sigmaLF != new_sigmaLF) {
+    print ("The standard deviation requirement on the linear form is TOO TIGHT!");
+    print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + "."); 
+}
+ot = X %*% w;
+
+if (b != 0) {
+    X = X [, 1:numFeatures];
+}
+
+emp_meanLF = sum (ot) / numSamples;
+emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
+print ("Empirical meanLF = " + emp_meanLF + ";   Empirical sigmaLF = " + emp_sigmaLF);
+
+prob = 1 / (1 + exp (- ot));
+
+if(addNoise == 1){
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+}else{
+	print("this data generator generates the same dataset for both noise=0 and noise=1")
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
+}
+
+print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + ";  nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
+
+Y = 1 - 2*ppred(prob, r, "<")
+
+write (w, $5, format="text");
+write (X, $6, format="binary");
+write (Y, $7, format="binary");
+
+
+# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
+# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
+# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
+scaleWeights = 
+    function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
+    return (Matrix[double] w_scaled, double new_sigmaLF)
+{
+    numFeatures = nrow (w_unscaled);
+    W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
+    W_ext [, 1] = w_unscaled;
+    S1 = colSums (X_data %*% W_ext);
+    TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
+    TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+    TF [1, 2] = S1 [1, 2];
+    TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+    TF [2, 2] = - S1 [1, 1];
+    TF = W_ext %*% TF;
+    Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
+    Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
+    new_sigmaLF = sigmaLF;
+    discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
+    if (discr > 0.0) {
+        new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
+        discr = -0.0;
+    }
+    t = Rand (rows = 2, cols = 1, min = 1, max = 1);
+    t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
+    w_scaled = TF %*% t;
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/welchTTest/welchTTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.R b/src/test/scripts/applications/welchTTest/welchTTest.R
index 66da912..ff74d71 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.R
+++ b/src/test/scripts/applications/welchTTest/welchTTest.R
@@ -1,49 +1,49 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
-negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
-t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
-degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
-
-writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
-writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
+negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
+t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
+degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
+
+writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
+writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/welchTTest/welchTTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.dml b/src/test/scripts/applications/welchTTest/welchTTest.dml
index 4e42f03..7bc0144 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.dml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.dml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = read($1, format="text")
-negSamples = read($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-write(t_statistics, $3, format="text")
-write(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = read($1, format="text")
+negSamples = read($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+write(t_statistics, $3, format="text")
+write(degrees_of_freedom, $4, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/welchTTest/welchTTest.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.pydml b/src/test/scripts/applications/welchTTest/welchTTest.pydml
index abfff3a..5dbd049 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.pydml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.pydml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = load($1, format="text")
-negSamples = load($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
-
-save(t_statistics, $3, format="text")
-save(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = load($1, format="text")
+negSamples = load($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
+
+save(t_statistics, $3, format="text")
+save(degrees_of_freedom, $4, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/AllMax.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMax.R b/src/test/scripts/functions/aggregate/AllMax.R
index 3ed23d6..73cde8d 100644
--- a/src/test/scripts/functions/aggregate/AllMax.R
+++ b/src/test/scripts/functions/aggregate/AllMax.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(max(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(max(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/aggregate/AllMean.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMean.R b/src/test/scripts/functions/aggregate/AllMean.R
index 4e315a4..07ee1a9 100644
--- a/src/test/scripts/functions/aggregate/AllMean.R
+++ b/src/test/scripts/functions/aggregate/AllMean.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(mean(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(mean(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file



[39/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4LogReg_LTstats.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4LogReg_LTstats.dml b/scripts/datagen/genRandData4LogReg_LTstats.dml
index 6742f0c..2ec5aef 100644
--- a/scripts/datagen/genRandData4LogReg_LTstats.dml
+++ b/scripts/datagen/genRandData4LogReg_LTstats.dml
@@ -1,233 +1,233 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# generates random data to test bi- and multinomial logistic regression
-
-# $N  = number of training samples
-# $Nt = number of test samples (or 0 if none)
-# $nf = number of features (independent variables)
-# $nc = number of categories; = 1 if "binomial" with +1/-1 labels
-# $Xmin  = minimum feature value
-# $Xmax  = maximum feature value
-# $spars = controls sparsity in the generated data
-# $avgLTmin = average linear term (X %*% beta + intercept), minimum value
-# $avgLTmax = average linear term (X %*% beta + intercept), maximum value
-# $stdLT = requested standard deviation for the linear terms
-# $iceptmin = intercept, minimum value (0.0 disables intercept)
-# $iceptmax = intercept, maximum value (0.0 disables intercept)
-# $B  = location to store generated regression parameters
-# $X  = location to store generated training data
-# $Y  = location to store generated training category labels
-# $Xt = location to store generated test data
-# $Yt = location to store generated test category labels
-#
-# Example:
-# hadoop jar SystemML.jar -f genRandData4LogReg_LTstats.dml -nvargs
-#     N=1000000 Nt=1000 nf=20 nc=3 Xmin=0.0 Xmax=1.0 spars=1.0 avgLTmin=3.0 avgLTmax=5.0 stdLT=1.25
-#     iceptmin=1.0 iceptmax=1.0 B=./B123 X=./X123 Y=./Y123 Xt=./Xt123 Yt=./Yt123
-
-numTrainingSamples = $N;
-numTestSamples = $Nt;
-numFeatures = $nf;
-numCategories = $nc;
-minIntercept = $iceptmin;
-maxIntercept = $iceptmax;
-minXentry = $Xmin;
-maxXentry = $Xmax;
-minAvgLT = $avgLTmin;
-maxAvgLT = $avgLTmax;
-sparsityLevel = $spars;
-stdevLT = $stdLT;
-fileB  = ifdef ($B,  "B");
-fileX  = ifdef ($X,  "X");
-fileY  = ifdef ($Y,  "Y");
-fileXt = ifdef ($Xt, "Xt");
-fileYt = ifdef ($Yt, "Yt");
-
-
-numSamples = numTrainingSamples + numTestSamples;
-
-isBinomialPMOne = FALSE;
-if (numCategories == 1) {
-    numCategories = 2;
-    isBinomialPMOne = TRUE;
-}
-do_we_output_intercept = 1;
-if (minIntercept == 0.0 & maxIntercept == 0.0) {
-    do_we_output_intercept = 0;
-}
-
-X = Rand (rows = numSamples, cols = numFeatures, min = minXentry, max = maxXentry, pdf = "uniform", sparsity = sparsityLevel);
-
-meanLT  = Rand (rows = 1, cols = numCategories - 1, min = minAvgLT, max = maxAvgLT, pdf = "uniform");
-sigmaLT = matrix (stdevLT, rows = 1, cols = numCategories - 1);
-b_intercept = Rand (rows = 1, cols = numCategories - 1, min = minIntercept, max = maxIntercept, pdf = "uniform");
-
-meanLT_minus_intercept = meanLT - b_intercept;
-[B, new_sigmaLT] = generateWeights (X, meanLT_minus_intercept, sigmaLT);
-
-ones = matrix (1.0, rows = numSamples, cols = 1);
-LT = X %*% B + ones %*% b_intercept;
-actual_meanLT  = colSums (LT) / numSamples;
-actual_sigmaLT = sqrt (colSums ((LT - ones %*% actual_meanLT)^2) / numSamples);
-
-for (i in 1:(numCategories - 1)) {
-    if (castAsScalar (new_sigmaLT [1, i]) == castAsScalar (sigmaLT [1, i])) {
-        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept [1, i])); 
-    } else {
-        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept [1, i]) + ",  st.dev.(LT) relaxed from " + castAsScalar (sigmaLT [1, i])); 
-    }
-    print ("    Wanted LT mean = " + castAsScalar (meanLT [1, i])        + ",  st.dev. = " + castAsScalar (new_sigmaLT [1, i]));
-    print ("    Actual LT mean = " + castAsScalar (actual_meanLT [1, i]) + ",  st.dev. = " + castAsScalar (actual_sigmaLT [1, i]));
-}
-
-
-ones = matrix (1.0, rows = 1, cols = numCategories - 1);
-Prob = exp (LT);
-Prob = Prob / ((1.0 + rowSums (Prob)) %*% ones);
-Prob = t(cumsum (t(Prob)));
-
-r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed = 0);
-R = r %*% ones;
-Y = 1 + rowSums (ppred (Prob, R, "<"));
-if (isBinomialPMOne) {
-    Y = 3 - 2 * Y;
-}
-
-
-/* USE FOR LINEAR REGRESSION
-
-r = Rand (rows = numSamples, cols = 1, pdf = "normal");
-Y = LT [, 1] + r;
-
-*/
-
-
-if (do_we_output_intercept == 1) {
-    new_B = matrix (0.0, rows = nrow(B) + 1, cols = ncol(B));
-    new_B [1:nrow(B), 1:ncol(B)] = B;
-    new_B [nrow(B)+1, 1:ncol(B)] = b_intercept;
-    write (new_B, fileB, format="mm");
-} else {
-    write (B, fileB, format="mm");
-}
-
-if (numTestSamples > 0) {
-    X_train = X [1:numTrainingSamples,];
-    Y_train = Y [1:numTrainingSamples,];
-    X_test  = X [(numTrainingSamples+1):numSamples,];
-    Y_test  = Y [(numTrainingSamples+1):numSamples,];
-    write (X_train, fileX,  format="mm");
-    write (Y_train, fileY,  format="mm");
-    write (X_test,  fileXt, format="mm");
-    write (Y_test,  fileYt, format="mm");
-} else {
-    write (X, fileX, format="mm");
-    write (Y, fileY, format="mm");
-}
-
-
-
-
-
-
-# Generates weight vectors to ensure the desired statistics for Linear Terms = X %*% W
-# To be used for data generation in the testing of GLM, Logistic Regression, etc.
-# INPUT:  meanLT and sigmaLT are row vectors, meanLT[1, i] and sigmaLT[1, i] are
-#         the desired mean and standard deviation for X %*% W[, i]
-# OUTPUT: "W" is the matrix of generated (column) weight vectors W[, i]
-#         new_sigmaLT[1, i] == sigmaLT[1, i] if the std.dev is successfully enforced,
-#         new_sigmaLT[1, i]  > sigmaLT[1, i] if we had to relax this constraint.
-generateWeights = 
-    function (Matrix[double] X, Matrix[double] meanLT, Matrix[double] sigmaLT)
-    return   (Matrix[double] W, Matrix[double] new_sigmaLT)
-{
-    num_w = ncol (meanLT);  # Number of output weight vectors
-    dim_w = ncol (X);       # Number of features / dimensions in a weight vector
-    w_X = t(colSums(X));    # "Prohibited" weight shift direction that changes meanLT
-                            # (all orthogonal shift directions do not affect meanLT)
-
-    # Compute "w_1" with meanLT = 1 and with the smallest possible sigmaLT
-
-    w_1 = straightenX (X);
-    r_1 = (X %*% w_1) - 1.0;
-    norm_r_1_sq = sum (r_1 ^ 2);
-    
-    # For each W[, i] generate uniformly random directions to shift away from "w_1"
-    
-    DW_raw = Rand (rows = dim_w, cols = num_w, pdf = "normal");
-    DW = DW_raw - (w_X %*% t(w_X) %*% DW_raw) / sum (w_X ^ 2); # Orthogonal to w_X
-    XDW = X %*% DW;
-    
-    # Determine how far to shift in the chosen directions to satisfy the constraints
-    # Use the positive root of the quadratic equation; relax sigmaLT where needed
-    
-    a_qe = colSums (XDW ^ 2);
-    b_qe = 2.0 * meanLT * (t(r_1) %*% XDW);
-    c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X);
-
-    is_sigmaLT_OK = ppred (c_qe, 0.0, "<=");
-    new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) * sqrt (norm_r_1_sq / nrow(X));
-    c_qe = is_sigmaLT_OK * c_qe;
-    x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe);
-    
-    # Scale and shift "w_1" in the "DW" directions to produce the result:
-    
-    ones = matrix (1.0, rows = dim_w, cols = 1);
-    W = w_1 %*% meanLT + DW * (ones %*% x_qe);
-}
-
-# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
-straightenX =
-    function (Matrix[double] X)
-    return   (Matrix[double] w)
-{
-    w_X = t(colSums(X));
-    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
-    eps = 0.000000001 * nrow(X);
-
-    # BEGIN LEAST SQUARES
-    
-    r_LS = - w_X;
-    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
-    p_LS = - r_LS;
-    norm_r2_LS = sum (r_LS ^ 2);
-    i_LS = 0;
-    while (i_LS < 50 & i_LS < ncol(X) & norm_r2_LS >= eps)
-    {
-        temp_LS = X %*% p_LS;
-        q_LS = (t(X) %*% temp_LS) + lambda_LS * p_LS;
-        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
-        z_LS = z_LS + alpha_LS * p_LS;
-        old_norm_r2_LS = norm_r2_LS;
-        r_LS = r_LS + alpha_LS * q_LS;
-        norm_r2_LS = sum (r_LS ^ 2);
-        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
-        i_LS = i_LS + 1;
-    }
-    
-    # END LEAST SQUARES
-    
-    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# generates random data to test bi- and multinomial logistic regression
+
+# $N  = number of training samples
+# $Nt = number of test samples (or 0 if none)
+# $nf = number of features (independent variables)
+# $nc = number of categories; = 1 if "binomial" with +1/-1 labels
+# $Xmin  = minimum feature value
+# $Xmax  = maximum feature value
+# $spars = controls sparsity in the generated data
+# $avgLTmin = average linear term (X %*% beta + intercept), minimum value
+# $avgLTmax = average linear term (X %*% beta + intercept), maximum value
+# $stdLT = requested standard deviation for the linear terms
+# $iceptmin = intercept, minimum value (0.0 disables intercept)
+# $iceptmax = intercept, maximum value (0.0 disables intercept)
+# $B  = location to store generated regression parameters
+# $X  = location to store generated training data
+# $Y  = location to store generated training category labels
+# $Xt = location to store generated test data
+# $Yt = location to store generated test category labels
+#
+# Example:
+# hadoop jar SystemML.jar -f genRandData4LogReg_LTstats.dml -nvargs
+#     N=1000000 Nt=1000 nf=20 nc=3 Xmin=0.0 Xmax=1.0 spars=1.0 avgLTmin=3.0 avgLTmax=5.0 stdLT=1.25
+#     iceptmin=1.0 iceptmax=1.0 B=./B123 X=./X123 Y=./Y123 Xt=./Xt123 Yt=./Yt123
+
+numTrainingSamples = $N;
+numTestSamples = $Nt;
+numFeatures = $nf;
+numCategories = $nc;
+minIntercept = $iceptmin;
+maxIntercept = $iceptmax;
+minXentry = $Xmin;
+maxXentry = $Xmax;
+minAvgLT = $avgLTmin;
+maxAvgLT = $avgLTmax;
+sparsityLevel = $spars;
+stdevLT = $stdLT;
+fileB  = ifdef ($B,  "B");
+fileX  = ifdef ($X,  "X");
+fileY  = ifdef ($Y,  "Y");
+fileXt = ifdef ($Xt, "Xt");
+fileYt = ifdef ($Yt, "Yt");
+
+
+numSamples = numTrainingSamples + numTestSamples;
+
+isBinomialPMOne = FALSE;
+if (numCategories == 1) {
+    numCategories = 2;
+    isBinomialPMOne = TRUE;
+}
+do_we_output_intercept = 1;
+if (minIntercept == 0.0 & maxIntercept == 0.0) {
+    do_we_output_intercept = 0;
+}
+
+X = Rand (rows = numSamples, cols = numFeatures, min = minXentry, max = maxXentry, pdf = "uniform", sparsity = sparsityLevel);
+
+meanLT  = Rand (rows = 1, cols = numCategories - 1, min = minAvgLT, max = maxAvgLT, pdf = "uniform");
+sigmaLT = matrix (stdevLT, rows = 1, cols = numCategories - 1);
+b_intercept = Rand (rows = 1, cols = numCategories - 1, min = minIntercept, max = maxIntercept, pdf = "uniform");
+
+meanLT_minus_intercept = meanLT - b_intercept;
+[B, new_sigmaLT] = generateWeights (X, meanLT_minus_intercept, sigmaLT);
+
+ones = matrix (1.0, rows = numSamples, cols = 1);
+LT = X %*% B + ones %*% b_intercept;
+actual_meanLT  = colSums (LT) / numSamples;
+actual_sigmaLT = sqrt (colSums ((LT - ones %*% actual_meanLT)^2) / numSamples);
+
+for (i in 1:(numCategories - 1)) {
+    if (castAsScalar (new_sigmaLT [1, i]) == castAsScalar (sigmaLT [1, i])) {
+        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept [1, i])); 
+    } else {
+        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept [1, i]) + ",  st.dev.(LT) relaxed from " + castAsScalar (sigmaLT [1, i])); 
+    }
+    print ("    Wanted LT mean = " + castAsScalar (meanLT [1, i])        + ",  st.dev. = " + castAsScalar (new_sigmaLT [1, i]));
+    print ("    Actual LT mean = " + castAsScalar (actual_meanLT [1, i]) + ",  st.dev. = " + castAsScalar (actual_sigmaLT [1, i]));
+}
+
+
+ones = matrix (1.0, rows = 1, cols = numCategories - 1);
+Prob = exp (LT);
+Prob = Prob / ((1.0 + rowSums (Prob)) %*% ones);
+Prob = t(cumsum (t(Prob)));
+
+r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed = 0);
+R = r %*% ones;
+Y = 1 + rowSums (ppred (Prob, R, "<"));
+if (isBinomialPMOne) {
+    Y = 3 - 2 * Y;
+}
+
+
+/* USE FOR LINEAR REGRESSION
+
+r = Rand (rows = numSamples, cols = 1, pdf = "normal");
+Y = LT [, 1] + r;
+
+*/
+
+
+if (do_we_output_intercept == 1) {
+    new_B = matrix (0.0, rows = nrow(B) + 1, cols = ncol(B));
+    new_B [1:nrow(B), 1:ncol(B)] = B;
+    new_B [nrow(B)+1, 1:ncol(B)] = b_intercept;
+    write (new_B, fileB, format="mm");
+} else {
+    write (B, fileB, format="mm");
+}
+
+if (numTestSamples > 0) {
+    X_train = X [1:numTrainingSamples,];
+    Y_train = Y [1:numTrainingSamples,];
+    X_test  = X [(numTrainingSamples+1):numSamples,];
+    Y_test  = Y [(numTrainingSamples+1):numSamples,];
+    write (X_train, fileX,  format="mm");
+    write (Y_train, fileY,  format="mm");
+    write (X_test,  fileXt, format="mm");
+    write (Y_test,  fileYt, format="mm");
+} else {
+    write (X, fileX, format="mm");
+    write (Y, fileY, format="mm");
+}
+
+
+
+
+
+
+# Generates weight vectors to ensure the desired statistics for Linear Terms = X %*% W
+# To be used for data generation in the testing of GLM, Logistic Regression, etc.
+# INPUT:  meanLT and sigmaLT are row vectors, meanLT[1, i] and sigmaLT[1, i] are
+#         the desired mean and standard deviation for X %*% W[, i]
+# OUTPUT: "W" is the matrix of generated (column) weight vectors W[, i]
+#         new_sigmaLT[1, i] == sigmaLT[1, i] if the std.dev is successfully enforced,
+#         new_sigmaLT[1, i]  > sigmaLT[1, i] if we had to relax this constraint.
+generateWeights = 
+    function (Matrix[double] X, Matrix[double] meanLT, Matrix[double] sigmaLT)
+    return   (Matrix[double] W, Matrix[double] new_sigmaLT)
+{
+    num_w = ncol (meanLT);  # Number of output weight vectors
+    dim_w = ncol (X);       # Number of features / dimensions in a weight vector
+    w_X = t(colSums(X));    # "Prohibited" weight shift direction that changes meanLT
+                            # (all orthogonal shift directions do not affect meanLT)
+
+    # Compute "w_1" with meanLT = 1 and with the smallest possible sigmaLT
+
+    w_1 = straightenX (X);
+    r_1 = (X %*% w_1) - 1.0;
+    norm_r_1_sq = sum (r_1 ^ 2);
+    
+    # For each W[, i] generate uniformly random directions to shift away from "w_1"
+    
+    DW_raw = Rand (rows = dim_w, cols = num_w, pdf = "normal");
+    DW = DW_raw - (w_X %*% t(w_X) %*% DW_raw) / sum (w_X ^ 2); # Orthogonal to w_X
+    XDW = X %*% DW;
+    
+    # Determine how far to shift in the chosen directions to satisfy the constraints
+    # Use the positive root of the quadratic equation; relax sigmaLT where needed
+    
+    a_qe = colSums (XDW ^ 2);
+    b_qe = 2.0 * meanLT * (t(r_1) %*% XDW);
+    c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X);
+
+    is_sigmaLT_OK = ppred (c_qe, 0.0, "<=");
+    new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) * sqrt (norm_r_1_sq / nrow(X));
+    c_qe = is_sigmaLT_OK * c_qe;
+    x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe);
+    
+    # Scale and shift "w_1" in the "DW" directions to produce the result:
+    
+    ones = matrix (1.0, rows = dim_w, cols = 1);
+    W = w_1 %*% meanLT + DW * (ones %*% x_qe);
+}
+
+# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
+# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
+# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
+straightenX =
+    function (Matrix[double] X)
+    return   (Matrix[double] w)
+{
+    w_X = t(colSums(X));
+    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
+    eps = 0.000000001 * nrow(X);
+
+    # BEGIN LEAST SQUARES
+    
+    r_LS = - w_X;
+    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
+    p_LS = - r_LS;
+    norm_r2_LS = sum (r_LS ^ 2);
+    i_LS = 0;
+    while (i_LS < 50 & i_LS < ncol(X) & norm_r2_LS >= eps)
+    {
+        temp_LS = X %*% p_LS;
+        q_LS = (t(X) %*% temp_LS) + lambda_LS * p_LS;
+        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
+        z_LS = z_LS + alpha_LS * p_LS;
+        old_norm_r2_LS = norm_r2_LS;
+        r_LS = r_LS + alpha_LS * q_LS;
+        norm_r2_LS = sum (r_LS ^ 2);
+        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
+        i_LS = i_LS + 1;
+    }
+    
+    # END LEAST SQUARES
+    
+    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4MultiClassSVM.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4MultiClassSVM.dml b/scripts/datagen/genRandData4MultiClassSVM.dml
index 65ee1d4..5d9fbcb 100644
--- a/scripts/datagen/genRandData4MultiClassSVM.dml
+++ b/scripts/datagen/genRandData4MultiClassSVM.dml
@@ -1,68 +1,68 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data to test linear logistic regression
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is maximum feature value (absolute value)
-# $4 is maximum weight (absolute value)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
-# $9 is b, 0 disables intercept
-# $10 controls sparsity in the generated data
-
-numSamples = $1
-numFeatures = $2
-maxFeatureValue = $3
-maxWeight = $4
-addNoise = $8
-b = $9
-
-X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", seed=0, sparsity=$10)
-X = X * maxFeatureValue 
-
-w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-w = w * maxWeight
-
-ot = X%*%w
-if(b!=0) {
-	b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
-	w =  t(append(t(w), b_mat))
-	ot = ot + b
-}
-
-prob = 1/(1+exp(-ot))
-if(addNoise == 1){
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
-	print("this data generator generates the same dataset for both noise=0 and noise=1")
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
-}
-Y = 1 - 2*ppred(prob, r, "<")
-Y = (Y+3)/2
-
-write(w, $5, format="binary")
-write(X, $6, format="binary")
-write(Y, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data to test linear logistic regression
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is maximum feature value (absolute value)
+# $4 is maximum weight (absolute value)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
+# $9 is b, 0 disables intercept
+# $10 controls sparsity in the generated data
+
+numSamples = $1
+numFeatures = $2
+maxFeatureValue = $3
+maxWeight = $4
+addNoise = $8
+b = $9
+
+X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", seed=0, sparsity=$10)
+X = X * maxFeatureValue 
+
+w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+w = w * maxWeight
+
+ot = X%*%w
+if(b!=0) {
+	b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
+	w =  t(append(t(w), b_mat))
+	ot = ot + b
+}
+
+prob = 1/(1+exp(-ot))
+if(addNoise == 1){
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+}else{
+	print("this data generator generates the same dataset for both noise=0 and noise=1")
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
+}
+Y = 1 - 2*ppred(prob, r, "<")
+Y = (Y+3)/2
+
+write(w, $5, format="binary")
+write(X, $6, format="binary")
+write(Y, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4NMF.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4NMF.dml b/scripts/datagen/genRandData4NMF.dml
index 5988d48..cf18430 100644
--- a/scripts/datagen/genRandData4NMF.dml
+++ b/scripts/datagen/genRandData4NMF.dml
@@ -1,129 +1,129 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data for non-negative
-# matrix factorization
-#
-# follows lda's generative model
-# see Blei, Ng & Jordan, JMLR'03 paper
-# titled Latent Dirichlet Allocation
-#
-# $1 is number of samples
-# $2 is number of features
-# $3 is number of latent factors
-# $4 is number of features per sample
-# 	 (may overlap). use this to vary
-#	 sparsity.	
-# $5 is file to store sample mixtures
-# $6 is file to store factors
-# $7 is file to store generated data
-
-numDocuments = $1
-numFeatures = $2
-numTopics = $3
-numWordsPerDoc = $4
-
-docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
-denomsTM = rowSums(docTopicMixtures)
-zerosInDenomsTM = ppred(denomsTM, 0, "==")
-denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
-parfor(i in 1:numTopics){
-	docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
-}
-write(docTopicMixtures, $5, format="binary")
-for(j in 2:numTopics){
-	docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
-}
-
-topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
-parfor(i in 1:numTopics){
-	topicDist = topicDistributions[i,]
-	
-	denom2 = sum(topicDist)
-	if(denom2 == 0){
-		denom2 = denom2 + 0.1
-	}
-	
-	topicDistributions[i,] = topicDist / denom2
-}
-write(topicDistributions, $6, format="binary")
-for(j in 2:numFeatures){
-	topicDistributions[,j] = topicDistributions[,j-1] + topicDistributions[,j]
-}
-
-data = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
-
-parfor(i in 1:numDocuments){
-	docTopic = docTopicMixtures[i,]
-	
-    ldata = Rand(rows=1, cols=numFeatures, min=0, max=0, pdf="uniform");
-  
-	r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
-	r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
-	
-	for(j in 1:numWordsPerDoc){
-		rz = castAsScalar(r_z[j,1])
-		continue = 1
-		
-		z = -1
-		#this is a workaround
-		#z=1	
-		
-		for(k1 in 1:numTopics){
-			prob = castAsScalar(docTopic[1,k1])
-			if(continue==1 & rz <= prob){
-				z=k1
-				continue=0
-			}
-		}
-		
-		if(z==-1){
-			print("z is unassigned: " + z)
-			z = numTopics
-		}
-		
-		rw = castAsScalar(r_w[j,1])
-		continue = 1
-		
-		w = -1
-		#this is a workaround
-		#w = 1
-		
-		for(k2 in 1:numFeatures){
-			prob = castAsScalar(topicDistributions[z,k2])
-			if(continue == 1 & rw <= prob){
-				w = k2
-				continue = 0
-			}
-		}
-		
-		if(w==-1){
-			print("w is unassigned: " + w)
-			w = numFeatures
-		}
-		
-		ldata[1,w] = ldata[1,w] + 1
-	}
-  
-    data[i,] = ldata;
-}
-
-write(data, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data for non-negative
+# matrix factorization
+#
+# follows lda's generative model
+# see Blei, Ng & Jordan, JMLR'03 paper
+# titled Latent Dirichlet Allocation
+#
+# $1 is number of samples
+# $2 is number of features
+# $3 is number of latent factors
+# $4 is number of features per sample
+# 	 (may overlap). use this to vary
+#	 sparsity.	
+# $5 is file to store sample mixtures
+# $6 is file to store factors
+# $7 is file to store generated data
+
+numDocuments = $1
+numFeatures = $2
+numTopics = $3
+numWordsPerDoc = $4
+
+docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
+denomsTM = rowSums(docTopicMixtures)
+zerosInDenomsTM = ppred(denomsTM, 0, "==")
+denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
+parfor(i in 1:numTopics){
+	docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
+}
+write(docTopicMixtures, $5, format="binary")
+for(j in 2:numTopics){
+	docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
+}
+
+topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
+parfor(i in 1:numTopics){
+	topicDist = topicDistributions[i,]
+	
+	denom2 = sum(topicDist)
+	if(denom2 == 0){
+		denom2 = denom2 + 0.1
+	}
+	
+	topicDistributions[i,] = topicDist / denom2
+}
+write(topicDistributions, $6, format="binary")
+for(j in 2:numFeatures){
+	topicDistributions[,j] = topicDistributions[,j-1] + topicDistributions[,j]
+}
+
+data = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
+
+parfor(i in 1:numDocuments){
+	docTopic = docTopicMixtures[i,]
+	
+    ldata = Rand(rows=1, cols=numFeatures, min=0, max=0, pdf="uniform");
+  
+	r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
+	r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
+	
+	for(j in 1:numWordsPerDoc){
+		rz = castAsScalar(r_z[j,1])
+		continue = 1
+		
+		z = -1
+		#this is a workaround
+		#z=1	
+		
+		for(k1 in 1:numTopics){
+			prob = castAsScalar(docTopic[1,k1])
+			if(continue==1 & rz <= prob){
+				z=k1
+				continue=0
+			}
+		}
+		
+		if(z==-1){
+			print("z is unassigned: " + z)
+			z = numTopics
+		}
+		
+		rw = castAsScalar(r_w[j,1])
+		continue = 1
+		
+		w = -1
+		#this is a workaround
+		#w = 1
+		
+		for(k2 in 1:numFeatures){
+			prob = castAsScalar(topicDistributions[z,k2])
+			if(continue == 1 & rw <= prob){
+				w = k2
+				continue = 0
+			}
+		}
+		
+		if(w==-1){
+			print("w is unassigned: " + w)
+			w = numFeatures
+		}
+		
+		ldata[1,w] = ldata[1,w] + 1
+	}
+  
+    data[i,] = ldata;
+}
+
+write(data, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4NMFBlockwise.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4NMFBlockwise.dml b/scripts/datagen/genRandData4NMFBlockwise.dml
index 63133da..e3fd67f 100644
--- a/scripts/datagen/genRandData4NMFBlockwise.dml
+++ b/scripts/datagen/genRandData4NMFBlockwise.dml
@@ -1,138 +1,138 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data for non-negative
-# matrix factorization
-#
-# follows lda's generative model
-# see Blei, Ng & Jordan, JMLR'03 paper
-# titled Latent Dirichlet Allocation
-#
-# $1 is number of samples
-# $2 is number of features
-# $3 is number of latent factors
-# $4 is number of features per sample
-# 	 (may overlap). use this to vary
-#	 sparsity.	
-# $5 is file to store sample mixtures
-# $6 is file to store factors
-# $7 is file to store generated data
-#
-# $8 is the blocksize, i.e., number of rows per block
-#    (should be set such that $8x$2 fits in mem budget)
-
-numDocuments = $1
-numFeatures = $2
-numTopics = $3
-numWordsPerDoc = $4
-blocksize = $8
-
-docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
-denomsTM = rowSums(docTopicMixtures)
-zerosInDenomsTM = ppred(denomsTM, 0, "==")
-denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
-parfor(i in 1:numTopics){
-	docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
-}
-write(docTopicMixtures, $5, format="binary")
-for(j in 2:numTopics){
-	docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
-}
-
-topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
-parfor(i in 1:numTopics){
-	topicDist = topicDistributions[i,]
-	
-	denom2 = sum(topicDist)
-	if(denom2 == 0){
-		denom2 = denom2 + 0.1
-	}
-	
-	topicDistributions[i,] = topicDist / denom2
-}
-write(topicDistributions, $6, format="binary")
-for(j in 2:numFeatures){
-	topicDistributions[,j] = topicDistributions[,j-1] + topicDistributions[,j]
-}
-
-data0 = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
-
-#outer-loop for blockwise computation
-for( k in seq(1,numDocuments,blocksize) )  
-{
-  len = min(blocksize,numDocuments-k); #block length
-  data = data0[k:(k+len),];            #obtain block
-  
-  parfor(i in 1:len){
-  	docTopic = docTopicMixtures[i,]
-  	
-  	r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
-  	r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
-  	
-  	for(j in 1:numWordsPerDoc){
-  		rz = castAsScalar(r_z[j,1])
-  		continue = 1
-  		
-  		z = -1
-  		#this is a workaround
-  		#z=1	
-  		
-  		for(k1 in 1:numTopics){
-  			prob = castAsScalar(docTopic[1,k1])
-  			if(continue==1 & rz <= prob){
-  				z=k1
-  				continue=0
-  			}
-  		}
-  		
-  		if(z==-1){
-  			print("z is unassigned: " + z)
-  			z = numTopics
-  		}
-  		
-  		rw = castAsScalar(r_w[j,1])
-  		continue = 1
-  		
-  		w = -1
-  		#this is a workaround
-  		#w = 1
-  		
-  		for(k2 in 1:numFeatures){
-  			prob = castAsScalar(topicDistributions[z,k2])
-  			if(continue == 1 & rw <= prob){
-  				w = k2
-  				continue = 0
-  			}
-  		}
-  		
-  		if(w==-1){
-  			print("w is unassigned: " + w)
-  			w = numFeatures
-  		}
-  		
-  		data[i,w] = data[i,w] + 1
-  	}
-  }
-  
-  data0[k:(k+len),] = data; # write block back
-}
-
-write(data0, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data for non-negative
+# matrix factorization
+#
+# follows lda's generative model
+# see Blei, Ng & Jordan, JMLR'03 paper
+# titled Latent Dirichlet Allocation
+#
+# $1 is number of samples
+# $2 is number of features
+# $3 is number of latent factors
+# $4 is number of features per sample
+# 	 (may overlap). use this to vary
+#	 sparsity.	
+# $5 is file to store sample mixtures
+# $6 is file to store factors
+# $7 is file to store generated data
+#
+# $8 is the blocksize, i.e., number of rows per block
+#    (should be set such that $8x$2 fits in mem budget)
+
+numDocuments = $1
+numFeatures = $2
+numTopics = $3
+numWordsPerDoc = $4
+blocksize = $8
+
+docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
+denomsTM = rowSums(docTopicMixtures)
+zerosInDenomsTM = ppred(denomsTM, 0, "==")
+denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
+parfor(i in 1:numTopics){
+	docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
+}
+write(docTopicMixtures, $5, format="binary")
+for(j in 2:numTopics){
+	docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
+}
+
+topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75)
+parfor(i in 1:numTopics){
+	topicDist = topicDistributions[i,]
+	
+	denom2 = sum(topicDist)
+	if(denom2 == 0){
+		denom2 = denom2 + 0.1
+	}
+	
+	topicDistributions[i,] = topicDist / denom2
+}
+write(topicDistributions, $6, format="binary")
+for(j in 2:numFeatures){
+	topicDistributions[,j] = topicDistributions[,j-1] + topicDistributions[,j]
+}
+
+data0 = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
+
+#outer-loop for blockwise computation
+for( k in seq(1,numDocuments,blocksize) )  
+{
+  len = min(blocksize,numDocuments-k); #block length
+  data = data0[k:(k+len),];            #obtain block
+  
+  parfor(i in 1:len){
+  	docTopic = docTopicMixtures[i,]
+  	
+  	r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
+  	r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", seed=0)
+  	
+  	for(j in 1:numWordsPerDoc){
+  		rz = castAsScalar(r_z[j,1])
+  		continue = 1
+  		
+  		z = -1
+  		#this is a workaround
+  		#z=1	
+  		
+  		for(k1 in 1:numTopics){
+  			prob = castAsScalar(docTopic[1,k1])
+  			if(continue==1 & rz <= prob){
+  				z=k1
+  				continue=0
+  			}
+  		}
+  		
+  		if(z==-1){
+  			print("z is unassigned: " + z)
+  			z = numTopics
+  		}
+  		
+  		rw = castAsScalar(r_w[j,1])
+  		continue = 1
+  		
+  		w = -1
+  		#this is a workaround
+  		#w = 1
+  		
+  		for(k2 in 1:numFeatures){
+  			prob = castAsScalar(topicDistributions[z,k2])
+  			if(continue == 1 & rw <= prob){
+  				w = k2
+  				continue = 0
+  			}
+  		}
+  		
+  		if(w==-1){
+  			print("w is unassigned: " + w)
+  			w = numFeatures
+  		}
+  		
+  		data[i,w] = data[i,w] + 1
+  	}
+  }
+  
+  data0[k:(k+len),] = data; # write block back
+}
+
+write(data0, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4SurvAnalysis.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4SurvAnalysis.dml b/scripts/datagen/genRandData4SurvAnalysis.dml
index 7ac235b..da94a22 100644
--- a/scripts/datagen/genRandData4SurvAnalysis.dml
+++ b/scripts/datagen/genRandData4SurvAnalysis.dml
@@ -1,133 +1,133 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT GENERATED RANDOM DATA FOR KAPLAN-MEIER AND COX PROPORTIONAL HAZARD MODELS
-# ASSUMPTION: BASELINE HAZARD HAS WEIBULL DISTIRUTION WITH PARAMETERS LAMBDA AND V 
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# type    Sting    ---          The type of model for which the data is being generated: "kaplan-meier" or "cox"
-# n       Int                   Number of records 
-# lambda  Double   2.0          Scale parameter of the Weibull distribution used for generating timestamps 
-# v       Double   1.5          Shape parameter of the Weibull distribution used for generating timestamps 
-# p       Double   0.8          1 - probability of a record being censored
-# g       Int      2            If type=kaplan-meier the number of categorical features used for grouping 
-# s       Int      1            If type=kaplan-meier the number of categorical features used for stratifying
-# f       Int      10           If type=kaplan-meier maximum number of levels (i.e., distinct values) of g+s categorical features
-# m       Int      100          If type=cox the number of features in the model
-# sp      Double   1.0          If type=cox the sparsity of the feature matrix 
-# O       String   ---          Location to write the output matrix containing random data for the kaplan-meier or the cox model 
-# B       String   ---          If type=cox location to write the output matrix containing the coefficients for the cox model 
-# TE 	  String   ---			Location to store column indices of X corresponding to timestamp (first row) and event information (second row)
-# F       String   ---			Location to store column indices of X which are to be used for fitting the Cox model
-# fmt     String   "text"       The output format of results of the kaplan-meier analysis, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUTS: 
-# 1- If type=kaplan-meier an n x (2+g+s) matrix O with      
-#    - column 1 contains timestamps generated randomly from a Weibull distribution with parameters lambda and v
-#	 - column 2 contains the information whether an event occurred (1) or data is censored (0)
-#	 - columns 3:2+g contain categorical features used for grouping	
-#    - columns 3+g:2+g+s contain categorical features used for stratifying
-#   if type=cox an n x (2+m) matrix O with 
-#	 - column 1 contains timestamps generated randomly from a Weibull distribution with parameters lambda and v
-#	 - column 2 contains the information whether an event occurred (1) or data is censored (0)
-#	 - columns 3:2+m contain scale features 
-# 2- If type=cox a coefficient matrix B
-# 3- A colum matrix TE containing the column indices of X corresponding to timestamp (first row) and event information (second row) 
-# 4- A column matrix F containing the column indices of X which are to be used for KM analysis or fitting the Cox model
-
-type = $type; # either "kaplan-meier" or "cox" 
-num_records = $n; 
-lambda = ifdef ($l, 2.0); 
-p_event = ifdef ($p, 0.8); # 1 - prob. of a record being censored
-# parameters related to the kaplan-meier model
-n_groups = ifdef ($g, 2);
-n_strata = ifdef ($s, 1);
-max_level = ifdef ($f, 10);
-# parameters related to the cox model
-num_features = ifdef ($m, 1000);  
-sparsity = ifdef ($sp, 1.0); 
-fileO = $O;
-fileB = $B; 
-fileTE = $TE;
-fileF = $F;
-fmtO = ifdef ($fmt, "text"); # $fmt="text" 
-p_censor = 1 - p_event; # prob. that record is censored
-
-if (type == "kaplan-meier") {
-	
-	v = ifdef ($v, 1.5);
-	# generate categorical features used for grouping and stratifying
-	X = ceil (rand (rows = num_records, cols = n_groups + n_strata, min = 0.000000001, max = max_level - 0.000000001, pdf = "uniform"));
-	
-	# generate timestamps
-	U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
-	T = (-log (U) / lambda) ^ (1/v);
-
-} else if (type == "cox") {
-
-	v = ifdef ($v, 50);
-	# generate feature matrix
-	X = rand (rows = num_records, cols = num_features, min = 1, max = 5, pdf = "uniform", sparsity = sparsity);
-
-	# generate coefficients
-	B = rand (rows = num_features, cols = 1, min = -1.0, max = 1.0, pdf = "uniform", sparsity = 1.0); # * beta_range;	
-
-	# generate timestamps
-	U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
-	T = (-log (U) / (lambda * exp (X %*% B)) ) ^ (1/v);
-
-} else {
-	stop ("Wrong model type!");
-}
-
-Y = matrix (0, rows = num_records, cols = 2);
-event = floor (rand (rows = num_records, cols = 1, min = (1 - p_censor), max = (1 + p_event)));
-n_time = sum (event);
-Y[,2] = event;
-  	
-# binning of event times
-min_T = min (T);
-max_T = max (T);
-# T = T - min_T;
-len = max_T - min_T;
-num_bins = len / n_time;
-T = ceil (T / num_bins);
-
-# print ("min(T) " + min(T) + " max(T) " + max(T));
-Y[,1] = T;
-
-O = append (Y, X);
-write (O, fileO, format = fmtO);
-
-if (type == "cox") {
-	write (B, fileB, format = fmtO);
-	
-}
-
-TE = matrix ("1 2", rows = 2, cols = 1);
-F = seq (1, num_features);
-write (TE, fileTE, format = fmtO);
-write (F, fileF, format = fmtO);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT GENERATED RANDOM DATA FOR KAPLAN-MEIER AND COX PROPORTIONAL HAZARD MODELS
+# ASSUMPTION: BASELINE HAZARD HAS WEIBULL DISTIRUTION WITH PARAMETERS LAMBDA AND V 
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# type    Sting    ---          The type of model for which the data is being generated: "kaplan-meier" or "cox"
+# n       Int                   Number of records 
+# lambda  Double   2.0          Scale parameter of the Weibull distribution used for generating timestamps 
+# v       Double   1.5          Shape parameter of the Weibull distribution used for generating timestamps 
+# p       Double   0.8          1 - probability of a record being censored
+# g       Int      2            If type=kaplan-meier the number of categorical features used for grouping 
+# s       Int      1            If type=kaplan-meier the number of categorical features used for stratifying
+# f       Int      10           If type=kaplan-meier maximum number of levels (i.e., distinct values) of g+s categorical features
+# m       Int      100          If type=cox the number of features in the model
+# sp      Double   1.0          If type=cox the sparsity of the feature matrix 
+# O       String   ---          Location to write the output matrix containing random data for the kaplan-meier or the cox model 
+# B       String   ---          If type=cox location to write the output matrix containing the coefficients for the cox model 
+# TE 	  String   ---			Location to store column indices of X corresponding to timestamp (first row) and event information (second row)
+# F       String   ---			Location to store column indices of X which are to be used for fitting the Cox model
+# fmt     String   "text"       The output format of results of the kaplan-meier analysis, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUTS: 
+# 1- If type=kaplan-meier an n x (2+g+s) matrix O with      
+#    - column 1 contains timestamps generated randomly from a Weibull distribution with parameters lambda and v
+#	 - column 2 contains the information whether an event occurred (1) or data is censored (0)
+#	 - columns 3:2+g contain categorical features used for grouping	
+#    - columns 3+g:2+g+s contain categorical features used for stratifying
+#   if type=cox an n x (2+m) matrix O with 
+#	 - column 1 contains timestamps generated randomly from a Weibull distribution with parameters lambda and v
+#	 - column 2 contains the information whether an event occurred (1) or data is censored (0)
+#	 - columns 3:2+m contain scale features 
+# 2- If type=cox a coefficient matrix B
+# 3- A colum matrix TE containing the column indices of X corresponding to timestamp (first row) and event information (second row) 
+# 4- A column matrix F containing the column indices of X which are to be used for KM analysis or fitting the Cox model
+
+type = $type; # either "kaplan-meier" or "cox" 
+num_records = $n; 
+lambda = ifdef ($l, 2.0); 
+p_event = ifdef ($p, 0.8); # 1 - prob. of a record being censored
+# parameters related to the kaplan-meier model
+n_groups = ifdef ($g, 2);
+n_strata = ifdef ($s, 1);
+max_level = ifdef ($f, 10);
+# parameters related to the cox model
+num_features = ifdef ($m, 1000);  
+sparsity = ifdef ($sp, 1.0); 
+fileO = $O;
+fileB = $B; 
+fileTE = $TE;
+fileF = $F;
+fmtO = ifdef ($fmt, "text"); # $fmt="text" 
+p_censor = 1 - p_event; # prob. that record is censored
+
+if (type == "kaplan-meier") {
+	
+	v = ifdef ($v, 1.5);
+	# generate categorical features used for grouping and stratifying
+	X = ceil (rand (rows = num_records, cols = n_groups + n_strata, min = 0.000000001, max = max_level - 0.000000001, pdf = "uniform"));
+	
+	# generate timestamps
+	U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
+	T = (-log (U) / lambda) ^ (1/v);
+
+} else if (type == "cox") {
+
+	v = ifdef ($v, 50);
+	# generate feature matrix
+	X = rand (rows = num_records, cols = num_features, min = 1, max = 5, pdf = "uniform", sparsity = sparsity);
+
+	# generate coefficients
+	B = rand (rows = num_features, cols = 1, min = -1.0, max = 1.0, pdf = "uniform", sparsity = 1.0); # * beta_range;	
+
+	# generate timestamps
+	U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
+	T = (-log (U) / (lambda * exp (X %*% B)) ) ^ (1/v);
+
+} else {
+	stop ("Wrong model type!");
+}
+
+Y = matrix (0, rows = num_records, cols = 2);
+event = floor (rand (rows = num_records, cols = 1, min = (1 - p_censor), max = (1 + p_event)));
+n_time = sum (event);
+Y[,2] = event;
+  	
+# binning of event times
+min_T = min (T);
+max_T = max (T);
+# T = T - min_T;
+len = max_T - min_T;
+num_bins = len / n_time;
+T = ceil (T / num_bins);
+
+# print ("min(T) " + min(T) + " max(T) " + max(T));
+Y[,1] = T;
+
+O = append (Y, X);
+write (O, fileO, format = fmtO);
+
+if (type == "cox") {
+	write (B, fileB, format = fmtO);
+	
+}
+
+TE = matrix ("1 2", rows = 2, cols = 1);
+F = seq (1, num_features);
+write (TE, fileTE, format = fmtO);
+write (F, fileF, format = fmtO);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4Transform.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4Transform.dml b/scripts/datagen/genRandData4Transform.dml
index b207629..bc799d6 100644
--- a/scripts/datagen/genRandData4Transform.dml
+++ b/scripts/datagen/genRandData4Transform.dml
@@ -1,96 +1,96 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# Generates random data to test transform with
-#
-# rows, cols: dimensions of the data matrix to be generated
-# prob_categorical: percentage of the generated cols to be categorical
-# min_domain, max_domain: provide a range for domain sizes of the generated categorical cols
-# prob_missing: percentage of the generated (scale) cols to have missing values
-# prob_missing_cell: probability of a cell to have a missing value
-# out_X, out_missing, out_categorical: output file names
-#
-
-#params for size of data
-num_rows = ifdef($rows, 1000)
-num_cols = ifdef($cols, 25)
-
-#params for kind of cols
-prob_categorical = ifdef($prob_cat, 0.1)
-min_domain_size = ifdef($min_domain, 1)
-max_domain_size = ifdef($max_domain, 10)
-
-#params for missing value cols
-prob_missing_col = ifdef($prob_missing, 0.1)
-prob_missing_val = ifdef($prob_missing_cell, 0.2)
-
-num_scalar_cols = as.double(num_cols)
-num_categorical_cols = 0.0
-scalar_ind = matrix(1, rows=num_scalar_cols, cols=1)
-if(prob_categorical > 0){
-  categorical_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
-  categorical_ind = ppred(categorical_ind, prob_categorical, "<")
-  categorical_col_ids = removeEmpty(target=seq(1, num_cols, 1)*categorical_ind, margin="rows")
-  num_categorical_cols = sum(categorical_ind)
-  write(categorical_col_ids, $out_categorical, format="csv")
-  
-  domain_sizes = Rand(rows=num_categorical_cols, cols=1, min=0, max=1, pdf="uniform")
-  domain_sizes = round(min_domain_size + (max_domain_size - min_domain_size)*domain_sizes)
-  
-  categorical_X = Rand(rows=num_rows, cols=num_categorical_cols, min=0, max=1, pdf="uniform")
-  categorical_X = t(round(1 + t(categorical_X)*(domain_sizes - 1)))
-
-  scalar_ind = 1-categorical_ind
-}
-
-scalar_col_ids = removeEmpty(target=seq(1, num_cols, 1)*scalar_ind, margin="rows")
-num_scalar_cols = sum(scalar_ind)
-scalar_X = Rand(rows=num_rows, cols=num_scalar_cols, min=0, max=1, pdf="uniform")
-  
-if(num_categorical_cols > 0 & num_scalar_cols > 0){
-  X = append(scalar_X, categorical_X)
-  permut_mat = table(seq(1, num_scalar_cols, 1), scalar_col_ids, num_scalar_cols, num_cols)
-  fill_in = matrix(0, rows=num_cols-num_scalar_cols, cols=num_cols)
-  permut_mat = t(append(t(permut_mat), t(fill_in)))
-  X = X %*% permut_mat
-}else{
-  if(num_categorical_cols > 0) X = categorical_X
-  else{
-    if(num_scalar_cols > 0) X = scalar_X
-    else print("somehow, we've managed to compute that precisely 0 cols should be categorical and 0 cols should be scale")
-  }
-}
-
-if(prob_missing_col > 0){
-  missing_col_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
-  missing_col_ind = ppred(missing_col_ind, prob_missing_col, "<")
-  #currently only support missing value imputation for scale cols
-  missing_col_ind = missing_col_ind * scalar_ind
-  missing_col_ids = removeEmpty(target=seq(1, num_cols, 1)*missing_col_ind, margin="rows")
-  missing_values = Rand(rows=num_rows, cols=nrow(missing_col_ids), min=0, max=1, pdf="uniform")
-  missing_values = ppred(missing_values, prob_missing_val, "<")
-  X = append(X, missing_values)
-  
-  write(missing_col_ids, $out_missing, format="csv")
-}
-
-write(X, $out_X, format="csv")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# Generates random data to test transform with
+#
+# rows, cols: dimensions of the data matrix to be generated
+# prob_categorical: percentage of the generated cols to be categorical
+# min_domain, max_domain: provide a range for domain sizes of the generated categorical cols
+# prob_missing: percentage of the generated (scale) cols to have missing values
+# prob_missing_cell: probability of a cell to have a missing value
+# out_X, out_missing, out_categorical: output file names
+#
+
+#params for size of data
+num_rows = ifdef($rows, 1000)
+num_cols = ifdef($cols, 25)
+
+#params for kind of cols
+prob_categorical = ifdef($prob_cat, 0.1)
+min_domain_size = ifdef($min_domain, 1)
+max_domain_size = ifdef($max_domain, 10)
+
+#params for missing value cols
+prob_missing_col = ifdef($prob_missing, 0.1)
+prob_missing_val = ifdef($prob_missing_cell, 0.2)
+
+num_scalar_cols = as.double(num_cols)
+num_categorical_cols = 0.0
+scalar_ind = matrix(1, rows=num_scalar_cols, cols=1)
+if(prob_categorical > 0){
+  categorical_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
+  categorical_ind = ppred(categorical_ind, prob_categorical, "<")
+  categorical_col_ids = removeEmpty(target=seq(1, num_cols, 1)*categorical_ind, margin="rows")
+  num_categorical_cols = sum(categorical_ind)
+  write(categorical_col_ids, $out_categorical, format="csv")
+  
+  domain_sizes = Rand(rows=num_categorical_cols, cols=1, min=0, max=1, pdf="uniform")
+  domain_sizes = round(min_domain_size + (max_domain_size - min_domain_size)*domain_sizes)
+  
+  categorical_X = Rand(rows=num_rows, cols=num_categorical_cols, min=0, max=1, pdf="uniform")
+  categorical_X = t(round(1 + t(categorical_X)*(domain_sizes - 1)))
+
+  scalar_ind = 1-categorical_ind
+}
+
+scalar_col_ids = removeEmpty(target=seq(1, num_cols, 1)*scalar_ind, margin="rows")
+num_scalar_cols = sum(scalar_ind)
+scalar_X = Rand(rows=num_rows, cols=num_scalar_cols, min=0, max=1, pdf="uniform")
+  
+if(num_categorical_cols > 0 & num_scalar_cols > 0){
+  X = append(scalar_X, categorical_X)
+  permut_mat = table(seq(1, num_scalar_cols, 1), scalar_col_ids, num_scalar_cols, num_cols)
+  fill_in = matrix(0, rows=num_cols-num_scalar_cols, cols=num_cols)
+  permut_mat = t(append(t(permut_mat), t(fill_in)))
+  X = X %*% permut_mat
+}else{
+  if(num_categorical_cols > 0) X = categorical_X
+  else{
+    if(num_scalar_cols > 0) X = scalar_X
+    else print("somehow, we've managed to compute that precisely 0 cols should be categorical and 0 cols should be scale")
+  }
+}
+
+if(prob_missing_col > 0){
+  missing_col_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
+  missing_col_ind = ppred(missing_col_ind, prob_missing_col, "<")
+  #currently only support missing value imputation for scale cols
+  missing_col_ind = missing_col_ind * scalar_ind
+  missing_col_ids = removeEmpty(target=seq(1, num_cols, 1)*missing_col_ind, margin="rows")
+  missing_values = Rand(rows=num_rows, cols=nrow(missing_col_ids), min=0, max=1, pdf="uniform")
+  missing_values = ppred(missing_values, prob_missing_val, "<")
+  X = append(X, missing_values)
+  
+  write(missing_col_ids, $out_missing, format="csv")
+}
+
+write(X, $out_X, format="csv")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/datagen/genRandData4Univariate.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4Univariate.dml b/scripts/datagen/genRandData4Univariate.dml
index d3c842c..bcbd528 100644
--- a/scripts/datagen/genRandData4Univariate.dml
+++ b/scripts/datagen/genRandData4Univariate.dml
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random numbers from a distribution
-# with specified mean, standard deviation, 
-# skewness, kurtosis
-# mean and standard deviation are taken in as
-# arguments by this script
-# a,b,c,d are coefficients computed by some
-# equation solver determined from the specified
-# skewness and kurtosis using power method
-# polynomials
-#
-# for more details see:
-# Statistical Simulation: Power Method Polynomials
-# and Other Transformations
-# Author: Todd C. Headrick
-# Chapman & Hall/CRC, Boca Raton, FL, 2010.
-# ISBN 978-1-4200-6490-2
-
-# $1 is the number of random points to be sampled
-# $2 is specified mean
-# $3 is specified standard deviation
-# $4-$7 are a,b,c,d obtained by solving a system
-# of equations using specified kurtosis and skewness
-# $8 is the file to write out the generated data to
-
-numSamples = $1
-mu = $2
-sigma = $3
-a = $4
-b = $5
-c = $6
-d = $7
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random numbers from a distribution
+# with specified mean, standard deviation, 
+# skewness, kurtosis
+# mean and standard deviation are taken in as
+# arguments by this script
+# a,b,c,d are coefficients computed by some
+# equation solver determined from the specified
+# skewness and kurtosis using power method
+# polynomials
+#
+# for more details see:
+# Statistical Simulation: Power Method Polynomials
+# and Other Transformations
+# Author: Todd C. Headrick
+# Chapman & Hall/CRC, Boca Raton, FL, 2010.
+# ISBN 978-1-4200-6490-2
+
+# $1 is the number of random points to be sampled
+# $2 is specified mean
+# $3 is specified standard deviation
+# $4-$7 are a,b,c,d obtained by solving a system
+# of equations using specified kurtosis and skewness
+# $8 is the file to write out the generated data to
+
+numSamples = $1
+mu = $2
+sigma = $3
+a = $4
+b = $5
+c = $6
+d = $7
+
 
 print("a=" + a + " b=" + b + " c=" + c + " d=" + d)
 
-X = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
-Y = a + b*X + c*X^2 + d*X^3
-
-Z = Y*sigma + mu
-write(Z, $8, format="binary")
+X = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
+Y = a + b*X + c*X^2 + d*X^3
+
+Z = Y*sigma + mu
+write(Z, $8, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/staging/PPCA.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/PPCA.dml b/scripts/staging/PPCA.dml
index 667c709..abfcdb1 100644
--- a/scripts/staging/PPCA.dml
+++ b/scripts/staging/PPCA.dml
@@ -1,160 +1,160 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
- 
-# This script performs Probabilistic Principal Component Analysis (PCA) on the given input data. 
-# It is based on paper: sPCA: Scalable Principal Component Analysis for Big Data on Distributed 
-# Platforms. Tarek Elgamal et.al.
-
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME   	TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X  	 	String ---      location to read the matrix X input matrix
-# k      	Int    ---      indicates dimension of the new vector space constructed from eigen vectors
-# tolobj 	Int    0.00001  objective function tolerance value to stop ppca algorithm
-# tolrecerr	Int    0.02     reconstruction error tolerance value to stop the algorithm	
-# iter   	Int    10       maximum number of iterations
-# fmt    	String 'text'   output format of results PPCA such as "text" or "csv"
-# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X  C=/OUTPUT_DIR/C V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
-# ---------------------------------------------------------------------------------------------
-# OUTPUT PARAMETERS: 
-# ---------------------------------------------------------------------------------------------
-# NAME   TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# C     	Matrix  ---     principal components
-# V      	Matrix  ---     eigenvalues / eigenvalues of principal components
-#
-
-X = read($X);
-
-fileC = $C;
-fileV = $V;
-
-k = ifdef($k, ncol(X));
-iter = ifdef($iter, 10);
-tolobj = ifdef($tolobj, 0.00001);
-tolrecerr = ifdef($tolrecerr, 0.02);
-fmt0 = ifdef($fmt, "text"); 
-
-n = nrow(X);
-m = ncol(X);
-
-#initializing principal components matrix
-C =  rand(rows=m, cols=k, pdf="normal");
-ss = rand(rows=1, cols=1, pdf="normal");
-ss = as.scalar(ss);
-ssPrev = ss;
-
-# best selected principle components - with the lowest reconstruction error 
-PC = C;
-
-# initilizing reconstruction error
-RE = tolrecerr+1;
-REBest = RE;
-
-Z = matrix(0,rows=1,cols=1);
-
-#Objective function value
-ObjRelChng = tolobj+1;
-
-# mean centered input matrix - dim -> [n,m]
-Xm = X - colMeans(X);
-
-#I -> k x k
-ITMP = matrix(1,rows=k,cols=1);
-I = diag(ITMP);
-
-i = 0;
-while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
-	#Estimation step - Covariance matrix 
-	#M -> k x k
-	M = t(C) %*% C + I*ss; 
-	
-	#Auxilary matrix with n latent variables 
-	# Z -> n x k		
-	Z = Xm %*% (C %*% inv(M)); 
-
-	#ZtZ -> k x k
-	ZtZ = t(Z) %*% Z + inv(M)*ss;
-	
-	#XtZ -> m x k
-	XtZ = t(Xm) %*% Z;
-	
-	#Maximization step
-	#C ->  m x k
-	ZtZ_sum = sum(ZtZ); #+n*inv(M)); 
-	C = XtZ/ZtZ_sum;
-
-	#ss2 -> 1 x 1
-	ss2 = trace(ZtZ * (t(C) %*% C));
-
-	#ss3 -> 1 x 1 
-	ss3 = sum((Z %*% t(C)) %*% t(Xm));
-	
-	#Frobenius norm of reconstruction error -> Euclidean norm 
-	#Fn -> 1 x 1	
-	Fn = sum(Xm*Xm);
-
-	#ss -> 1 x 1
-	ss = (Fn + ss2 - 2*ss3)/(n*m);
-
-   #calculating objective function relative change
-   ObjRelChng = abs(1 - ss/ssPrev);
-   #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
-
-	#Reconstruction error
-	R = ((Z %*% t(C)) -  Xm);	
-
-	#calculate the error
-	#TODO rethink calculation of reconstruction error .... 
-	#1-Norm of reconstruction error - a big dense matrix 
-	#RE -> n x m
-	RE = abs(sum(R)/sum(Xm));	
-	if (RE < REBest){
-		PC = C;
-		REBest = RE;
-	}	
-	#print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2  +" ) - 2*ss3( " + ss3 + " ), Reconstruction Error: " + RE);
-
-	ssPrev = ss;	
-	i = i+1;
-}
-print("Objective Relative Change: " + ObjRelChng);
-print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
-
-# reconstructs data
-# RD -> n x k
-RD = X %*% PC;
-
-# calculate eigenvalues - principle component variance
-RDMean = colMeans(RD);
-V = t(colMeans(RD*RD) - (RDMean*RDMean));
-
-# sorting eigenvalues and eigenvectors in decreasing order
-V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
-VF_decr = table(seq(1,nrow(V)),V_decr_idx);
-V = VF_decr %*% V;
-PC = PC %*% VF_decr;
-
-# writing principal components 
-write(PC, fileC, format=fmt0);
-# writing eigen values/pc variance
-write(V, fileV, format=fmt0);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+ 
+# This script performs Probabilistic Principal Component Analysis (PCA) on the given input data. 
+# It is based on paper: sPCA: Scalable Principal Component Analysis for Big Data on Distributed 
+# Platforms. Tarek Elgamal et.al.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME   	TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X  	 	String ---      location to read the matrix X input matrix
+# k      	Int    ---      indicates dimension of the new vector space constructed from eigen vectors
+# tolobj 	Int    0.00001  objective function tolerance value to stop ppca algorithm
+# tolrecerr	Int    0.02     reconstruction error tolerance value to stop the algorithm	
+# iter   	Int    10       maximum number of iterations
+# fmt    	String 'text'   output format of results PPCA such as "text" or "csv"
+# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X  C=/OUTPUT_DIR/C V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
+# ---------------------------------------------------------------------------------------------
+# OUTPUT PARAMETERS: 
+# ---------------------------------------------------------------------------------------------
+# NAME   TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# C     	Matrix  ---     principal components
+# V      	Matrix  ---     eigenvalues / eigenvalues of principal components
+#
+
+X = read($X);
+
+fileC = $C;
+fileV = $V;
+
+k = ifdef($k, ncol(X));
+iter = ifdef($iter, 10);
+tolobj = ifdef($tolobj, 0.00001);
+tolrecerr = ifdef($tolrecerr, 0.02);
+fmt0 = ifdef($fmt, "text"); 
+
+n = nrow(X);
+m = ncol(X);
+
+#initializing principal components matrix
+C =  rand(rows=m, cols=k, pdf="normal");
+ss = rand(rows=1, cols=1, pdf="normal");
+ss = as.scalar(ss);
+ssPrev = ss;
+
+# best selected principle components - with the lowest reconstruction error 
+PC = C;
+
+# initilizing reconstruction error
+RE = tolrecerr+1;
+REBest = RE;
+
+Z = matrix(0,rows=1,cols=1);
+
+#Objective function value
+ObjRelChng = tolobj+1;
+
+# mean centered input matrix - dim -> [n,m]
+Xm = X - colMeans(X);
+
+#I -> k x k
+ITMP = matrix(1,rows=k,cols=1);
+I = diag(ITMP);
+
+i = 0;
+while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
+	#Estimation step - Covariance matrix 
+	#M -> k x k
+	M = t(C) %*% C + I*ss; 
+	
+	#Auxilary matrix with n latent variables 
+	# Z -> n x k		
+	Z = Xm %*% (C %*% inv(M)); 
+
+	#ZtZ -> k x k
+	ZtZ = t(Z) %*% Z + inv(M)*ss;
+	
+	#XtZ -> m x k
+	XtZ = t(Xm) %*% Z;
+	
+	#Maximization step
+	#C ->  m x k
+	ZtZ_sum = sum(ZtZ); #+n*inv(M)); 
+	C = XtZ/ZtZ_sum;
+
+	#ss2 -> 1 x 1
+	ss2 = trace(ZtZ * (t(C) %*% C));
+
+	#ss3 -> 1 x 1 
+	ss3 = sum((Z %*% t(C)) %*% t(Xm));
+	
+	#Frobenius norm of reconstruction error -> Euclidean norm 
+	#Fn -> 1 x 1	
+	Fn = sum(Xm*Xm);
+
+	#ss -> 1 x 1
+	ss = (Fn + ss2 - 2*ss3)/(n*m);
+
+   #calculating objective function relative change
+   ObjRelChng = abs(1 - ss/ssPrev);
+   #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
+
+	#Reconstruction error
+	R = ((Z %*% t(C)) -  Xm);	
+
+	#calculate the error
+	#TODO rethink calculation of reconstruction error .... 
+	#1-Norm of reconstruction error - a big dense matrix 
+	#RE -> n x m
+	RE = abs(sum(R)/sum(Xm));	
+	if (RE < REBest){
+		PC = C;
+		REBest = RE;
+	}	
+	#print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2  +" ) - 2*ss3( " + ss3 + " ), Reconstruction Error: " + RE);
+
+	ssPrev = ss;	
+	i = i+1;
+}
+print("Objective Relative Change: " + ObjRelChng);
+print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
+
+# reconstructs data
+# RD -> n x k
+RD = X %*% PC;
+
+# calculate eigenvalues - principle component variance
+RDMean = colMeans(RD);
+V = t(colMeans(RD*RD) - (RDMean*RDMean));
+
+# sorting eigenvalues and eigenvectors in decreasing order
+V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
+VF_decr = table(seq(1,nrow(V)),V_decr_idx);
+V = VF_decr %*% V;
+PC = PC %*% VF_decr;
+
+# writing principal components 
+write(PC, fileC, format=fmt0);
+# writing eigen values/pc variance
+write(V, fileV, format=fmt0);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/staging/regression/lasso/lasso.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/regression/lasso/lasso.dml b/scripts/staging/regression/lasso/lasso.dml
index fb520df..1da88d3 100644
--- a/scripts/staging/regression/lasso/lasso.dml
+++ b/scripts/staging/regression/lasso/lasso.dml
@@ -1,113 +1,113 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#uses the sparsa algorithm to perform lasso regression
-
-X = read($X)
-y = read($Y)
-n = nrow(X)
-m = ncol(X)
-
-#params
-tol = 10^(-15)
-M = 5
-tau = 1
-maxiter = 1000
-
-#constants
-eta = 2
-sigma = 0.01
-alpha_min = 10^(-30)
-alpha_max = 10^(30)
-
-#init
-alpha = 1
-w = Rand(rows=m, cols=1, min=0, max=1, pdf="uniform")
-history = -1*10^30 * matrix(1, rows=M, cols=1)
-
-r = X %*% w - y
-g = t(X) %*% r
-obj = 0.5 * sum(r*r) + tau*sum(abs(w))
-
-print("Initial OBJ=" + obj)
-
-history[M,1] = obj
-
-inactive_set = matrix(1, rows=m, cols=1)
-iter = 0
-continue = TRUE
-while(iter < maxiter & continue) {
-	dw = matrix(0, rows=m, cols=1)
-	dg = matrix(0, rows=m, cols=1)
-	relChangeObj = -1.0
-	
-	inner_iter = 0
-	inner_continue = TRUE
-	inner_maxiter = 100
-	while(inner_iter < inner_maxiter & inner_continue) {
-		u = w - g/alpha
-		lambda = tau/alpha
-		
-		wnew = sign(u) * (abs(u) - lambda) * ppred(abs(u) - lambda, 0, ">")
-		dw = wnew - w
-		dw2 = sum(dw*dw)
-		
-		r = X %*% wnew - y
-		gnew = t(X) %*% r
-		objnew = 0.5 * sum(r*r) + tau*sum(abs(wnew))		
-		obj_threshold = max(history) - 0.5*sigma*alpha*dw2
-		
-		if(objnew <= obj_threshold) {
-			w = wnew
-			dg = gnew - g
-			g = gnew
-			inner_continue = FALSE
-			
-			history[1:(M-1),] = history[2:M,]
-			history[M,1] = objnew
-			relChangeObj = abs(objnew - obj)/obj
-			obj = objnew
-		}
-		else 
-			alpha = eta*alpha
-	
-		inner_iter = inner_iter + 1
-	}
-	
-	if(inner_continue) 
-		print("Inner loop did not converge")
-	
-	alphanew = sum(dw*dg)/sum(dw*dw)
-	alpha = max(alpha_min, min(alpha_max, alphanew))
-	
-	old_inactive_set = inactive_set
-	inactive_set = ppred(w, 0, "!=")
-	diff = sum(abs(old_inactive_set - inactive_set))
-
-	if(diff == 0 & relChangeObj < tol) 
-		continue = FALSE
-
-	num_inactive = sum(ppred(w, 0, "!="))
-	print("ITER=" + iter + " OBJ=" + obj + " relative change=" + relChangeObj + " num_inactive=" + num_inactive)
-	iter = iter + 1
-}
-
-write(w, $model)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#uses the sparsa algorithm to perform lasso regression
+
+X = read($X)
+y = read($Y)
+n = nrow(X)
+m = ncol(X)
+
+#params
+tol = 10^(-15)
+M = 5
+tau = 1
+maxiter = 1000
+
+#constants
+eta = 2
+sigma = 0.01
+alpha_min = 10^(-30)
+alpha_max = 10^(30)
+
+#init
+alpha = 1
+w = Rand(rows=m, cols=1, min=0, max=1, pdf="uniform")
+history = -1*10^30 * matrix(1, rows=M, cols=1)
+
+r = X %*% w - y
+g = t(X) %*% r
+obj = 0.5 * sum(r*r) + tau*sum(abs(w))
+
+print("Initial OBJ=" + obj)
+
+history[M,1] = obj
+
+inactive_set = matrix(1, rows=m, cols=1)
+iter = 0
+continue = TRUE
+while(iter < maxiter & continue) {
+	dw = matrix(0, rows=m, cols=1)
+	dg = matrix(0, rows=m, cols=1)
+	relChangeObj = -1.0
+	
+	inner_iter = 0
+	inner_continue = TRUE
+	inner_maxiter = 100
+	while(inner_iter < inner_maxiter & inner_continue) {
+		u = w - g/alpha
+		lambda = tau/alpha
+		
+		wnew = sign(u) * (abs(u) - lambda) * ppred(abs(u) - lambda, 0, ">")
+		dw = wnew - w
+		dw2 = sum(dw*dw)
+		
+		r = X %*% wnew - y
+		gnew = t(X) %*% r
+		objnew = 0.5 * sum(r*r) + tau*sum(abs(wnew))		
+		obj_threshold = max(history) - 0.5*sigma*alpha*dw2
+		
+		if(objnew <= obj_threshold) {
+			w = wnew
+			dg = gnew - g
+			g = gnew
+			inner_continue = FALSE
+			
+			history[1:(M-1),] = history[2:M,]
+			history[M,1] = objnew
+			relChangeObj = abs(objnew - obj)/obj
+			obj = objnew
+		}
+		else 
+			alpha = eta*alpha
+	
+		inner_iter = inner_iter + 1
+	}
+	
+	if(inner_continue) 
+		print("Inner loop did not converge")
+	
+	alphanew = sum(dw*dg)/sum(dw*dw)
+	alpha = max(alpha_min, min(alpha_max, alphanew))
+	
+	old_inactive_set = inactive_set
+	inactive_set = ppred(w, 0, "!=")
+	diff = sum(abs(old_inactive_set - inactive_set))
+
+	if(diff == 0 & relChangeObj < tol) 
+		continue = FALSE
+
+	num_inactive = sum(ppred(w, 0, "!="))
+	print("ITER=" + iter + " OBJ=" + obj + " relative change=" + relChangeObj + " num_inactive=" + num_inactive)
+	iter = iter + 1
+}
+
+write(w, $model)


[09/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/jmlc/reuse-glm-predict.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/reuse-glm-predict.dml b/src/test/scripts/functions/jmlc/reuse-glm-predict.dml
index 540971a..9a29d67 100644
--- a/src/test/scripts/functions/jmlc/reuse-glm-predict.dml
+++ b/src/test/scripts/functions/jmlc/reuse-glm-predict.dml
@@ -1,383 +1,383 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Default values for input parameters:
-fileX = $X;
-fileB = $B;
-fileM = ifdef ($M, " ");
-fileY = ifdef ($Y, " ");
-fileO = ifdef ($O, " ");
-fmtM  = ifdef ($fmt, "text");
-
-dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
-var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
-link_type  = ifdef ($link, 0);    # $link = 0;
-link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
-dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
-
-var_power  = as.double (var_power);
-link_power = as.double (link_power); 
-dispersion = as.double (dispersion);
-
-if (dist_type == 3) {
-    link_type = 2;
-} else { if (link_type == 0) { # Canonical Link
-    if (dist_type == 1) {
-        link_type = 1;
-        link_power = 1.0 - var_power;
-    } else { if (dist_type == 2) {
-            link_type = 2;
-}}} }
-
-X = read(fileX);
-#X = table(X[,1], X[,2], X[,3])
-
-num_records  = nrow (X);
-num_features = ncol (X);
-
-W = read (fileB);
-if (dist_type == 3) {
-    beta =  W [1 : ncol (X),  ];
-    intercept = W [nrow(W),  ];
-} else {
-    beta =  W [1 : ncol (X), 1];
-    intercept = W [nrow(W), 1];
-}
-if (nrow (W) == ncol (X)) {
-    intercept = 0.0 * intercept;
-    is_intercept = FALSE;
-} else {
-    num_features = num_features + 1;
-    is_intercept = TRUE;
-}
-
-ones_rec = matrix (1, rows = num_records, cols = 1);
-linear_terms = X %*% beta + ones_rec %*% intercept;
-[means, vars] =
-    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
-    
-if (fileM != " ") {
-    write (means, fileM, format=fmtM);
-}
-
-predicted_y = rowIndexMax(means)
-write(predicted_y, $P, format=fmtM)
-
-if (fileY != " ")
-{
-    Y = read (fileY);
-    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-    
-    # Statistics To Compute:
-    
-    Z_logl               = 0.0 / 0.0;
-    Z_logl_pValue        = 0.0 / 0.0;
-    X2_pearson           = 0.0 / 0.0;
-    df_pearson           = -1;
-    G2_deviance          = 0.0 / 0.0;
-    df_deviance          = -1;
-    X2_pearson_pValue    = 0.0 / 0.0;
-    G2_deviance_pValue   = 0.0 / 0.0;
-    Z_logl_scaled        = 0.0 / 0.0;
-    Z_logl_scaled_pValue = 0.0 / 0.0;
-    X2_scaled            = 0.0 / 0.0;
-    X2_scaled_pValue     = 0.0 / 0.0;
-    G2_scaled            = 0.0 / 0.0;
-    G2_scaled_pValue     = 0.0 / 0.0;
-    
-    if (dist_type == 1 & link_type == 1) {
-    #
-    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
-    #
-        if (link_power == 0.0) {
-            is_zero_Y = ppred (Y, 0.0, "==");
-            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
-        } else {
-            lt_saturated = Y ^ link_power;
-        }
-        Y_counts = ones_rec;
-
-        X2_pearson = sum ((Y - means) ^ 2 / vars);
-        df_pearson = num_records - num_features;
-
-        log_l_part = 
-            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
-        log_l_part_saturated = 
-            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
-            
-        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
-        df_deviance = num_records - num_features;
-        
-    } else { if (dist_type >= 2) {
-    #
-    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
-    #
-        if (ncol (Y) == 1) {
-            num_categories = ncol (beta) + 1;
-            if (min (Y) <= 0) { 
-                # Category labels "0", "-1" etc. are converted into the baseline label
-                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
-            }
-            Y_size = min (num_categories, max(Y));
-            Y_unsized = table (seq (1, num_records, 1), Y);
-            Y = matrix (0, rows = num_records, cols = num_categories);
-            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
-            Y_counts = ones_rec;
-        } else {
-            Y_counts = rowSums (Y);
-        }
-        
-        P = means;
-        zero_Y = ppred (Y, 0.0, "==");
-        zero_P = ppred (P, 0.0, "==");
-        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-        
-        logl_vec = rowSums (Y *  log (P + zero_Y)   );
-        ent1_vec = rowSums (P *  log (P + zero_P)   );
-        ent2_vec = rowSums (P * (log (P + zero_P))^2);
-        E_logl   = sum (Y_counts * ent1_vec);
-        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
-        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
-        
-        means = means * (Y_counts %*% t(ones_ctg));
-        vars  = vars  * (Y_counts %*% t(ones_ctg));
-        
-        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
-        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
-        
-        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
-            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
-        }
-        
-        X2_pearson = sum ((Y - means) ^ 2 / means);
-        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
-        
-        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
-        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
-    }}
-    
-    if (Z_logl == Z_logl) {
-        Z_logl_absneg = - abs (Z_logl);
-        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
-    }
-    if (X2_pearson == X2_pearson & df_pearson > 0) {
-        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_deviance == G2_deviance & df_deviance > 0) {
-        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    Z_logl_scaled = Z_logl / sqrt (dispersion);
-    X2_scaled = X2_pearson / dispersion;
-    G2_scaled = G2_deviance / dispersion;
-
-    if (Z_logl_scaled == Z_logl_scaled) {
-        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
-        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
-    }
-    if (X2_scaled == X2_scaled & df_pearson > 0) {
-        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_scaled == G2_scaled & df_deviance > 0) {
-        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
-    avg_res_Y = colSums (Y - means) / sum (Y_counts);
-    
-    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
-    ss_res_Y     = colSums ((Y - means) ^ 2);
-    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
-    
-    df_ss_res_Y  = sum (Y_counts) - num_features;
-    if (is_intercept) {
-        df_ss_avg_res_Y = df_ss_res_Y;
-    } else {
-        df_ss_avg_res_Y = df_ss_res_Y - 1;
-    }
-    
-    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
-    if (df_ss_avg_res_Y > 0) {
-        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
-    } else {
-        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
-    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
-    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
-    if (df_ss_res_Y > 0) {
-        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
-    } else {
-        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    
-    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
-    
-    # PREPARING THE OUTPUT CSV STATISTICS FILE
-    
-    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
-    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
-    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
-    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
-    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
-    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
-    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
-    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
-    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
-    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
-    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
-    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
-
-    for (i in 1:ncol(Y)) {
-        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
-        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
-        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
-        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
-        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
-        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
-        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
-        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
-        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
-    }
-    
-    if (fileO != " ") {
-        write (str, fileO);
-    } else {
-        print (str);
-    }
-}
-
-glm_means_and_vars = 
-    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
-    return (Matrix[double] means, Matrix[double] vars)
-    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
-{
-    num_points = nrow (linear_terms);
-    if (dist_type == 1 & link_type == 1) {
-    # POWER DISTRIBUTION
-        if          (link_power ==  0.0) {
-            y_mean = exp (linear_terms);
-        } else { if (link_power ==  1.0) {
-            y_mean = linear_terms;
-        } else { if (link_power == -1.0) {
-            y_mean = 1.0 / linear_terms;
-        } else {
-            y_mean = linear_terms ^ (1.0 / link_power);
-        }}}
-        if (var_power == 0.0) {
-            var_function = matrix (1.0, rows = num_points, cols = 1);
-        } else { if (var_power == 1.0) {
-            var_function = y_mean;
-        } else {
-            var_function = y_mean ^ var_power;
-        }}
-        means = y_mean;
-        vars = var_function;
-    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
-    # BINOMIAL/BERNOULLI DISTRIBUTION
-        y_prob = matrix (0.0, rows = num_points, cols = 2);
-        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-            y_prob [, 1]  = exp (linear_terms);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
-            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 2)                      { # Binomial.logit
-            elt = exp (linear_terms);
-            y_prob [, 1]  = elt / (1.0 + elt);
-            y_prob [, 2]  = 1.0 / (1.0 + elt);
-        } else { if (link_type == 3)                      { # Binomial.probit
-            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
-            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-            erf_corr =
-                t_gp * ( 0.254829592 
-              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-              + t_gp * (-1.453152027 
-              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
-            y_prob [, 1] = (1 + sign_lt) - erf_corr;
-            y_prob [, 2] = (1 - sign_lt) + erf_corr;
-            y_prob = y_prob / 2;
-        } else { if (link_type == 4)                      { # Binomial.cloglog
-            elt = exp (linear_terms);
-            is_too_small = ppred (10000000 + elt, 10000000, "==");
-            y_prob [, 2] = exp (- elt);
-            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
-        } else { if (link_type == 5)                      { # Binomial.cauchit
-            atan_linear_terms = atan (linear_terms);
-            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
-            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
-        }}}}}}
-        means = y_prob;
-        ones_ctg = matrix (1, rows = 2, cols = 1);
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else { if (dist_type == 3) {
-    # MULTINOMIAL LOGIT DISTRIBUTION
-        elt = exp (linear_terms);
-        ones_pts = matrix (1, rows = num_points, cols = 1);
-        elt = append (elt, ones_pts);
-        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
-        means = elt / (rowSums (elt) %*% t(ones_ctg));
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else {
-        means = matrix (0.0, rows = num_points, cols = 1);
-        vars  = matrix (0.0, rows = num_points, cols = 1);
-}   }}}
-
-glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
-    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
-    return (Matrix[double] log_l_part)
-{
-    num_records = nrow (Y);
-    if (var_power == 1.0) { # Poisson
-        if (link_power == 0.0)  { # Poisson.log
-            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-            b_cumulant = exp (linear_terms);
-        } else {                  # Poisson.power_nonlog
-            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-        }
-        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
-        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
-    } else {
-        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
-            natural_parameters = - exp (- linear_terms);
-            b_cumulant = linear_terms;
-        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
-            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-            b_cumulant = log (linear_terms) / link_power;
-        } else { if (link_power == 0.0) { # PowerDist.log
-            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-        } else {                          # PowerDist.power_nonlog
-            power_np = (1.0 - var_power) / link_power;
-            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
-            power_cu = (2.0 - var_power) / link_power;
-            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
-        }}}
-        log_l_part = Y * natural_parameters - b_cumulant;
-}   }
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Default values for input parameters:
+fileX = $X;
+fileB = $B;
+fileM = ifdef ($M, " ");
+fileY = ifdef ($Y, " ");
+fileO = ifdef ($O, " ");
+fmtM  = ifdef ($fmt, "text");
+
+dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
+var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
+link_type  = ifdef ($link, 0);    # $link = 0;
+link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
+dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
+
+var_power  = as.double (var_power);
+link_power = as.double (link_power); 
+dispersion = as.double (dispersion);
+
+if (dist_type == 3) {
+    link_type = 2;
+} else { if (link_type == 0) { # Canonical Link
+    if (dist_type == 1) {
+        link_type = 1;
+        link_power = 1.0 - var_power;
+    } else { if (dist_type == 2) {
+            link_type = 2;
+}}} }
+
+X = read(fileX);
+#X = table(X[,1], X[,2], X[,3])
+
+num_records  = nrow (X);
+num_features = ncol (X);
+
+W = read (fileB);
+if (dist_type == 3) {
+    beta =  W [1 : ncol (X),  ];
+    intercept = W [nrow(W),  ];
+} else {
+    beta =  W [1 : ncol (X), 1];
+    intercept = W [nrow(W), 1];
+}
+if (nrow (W) == ncol (X)) {
+    intercept = 0.0 * intercept;
+    is_intercept = FALSE;
+} else {
+    num_features = num_features + 1;
+    is_intercept = TRUE;
+}
+
+ones_rec = matrix (1, rows = num_records, cols = 1);
+linear_terms = X %*% beta + ones_rec %*% intercept;
+[means, vars] =
+    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
+    
+if (fileM != " ") {
+    write (means, fileM, format=fmtM);
+}
+
+predicted_y = rowIndexMax(means)
+write(predicted_y, $P, format=fmtM)
+
+if (fileY != " ")
+{
+    Y = read (fileY);
+    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+    
+    # Statistics To Compute:
+    
+    Z_logl               = 0.0 / 0.0;
+    Z_logl_pValue        = 0.0 / 0.0;
+    X2_pearson           = 0.0 / 0.0;
+    df_pearson           = -1;
+    G2_deviance          = 0.0 / 0.0;
+    df_deviance          = -1;
+    X2_pearson_pValue    = 0.0 / 0.0;
+    G2_deviance_pValue   = 0.0 / 0.0;
+    Z_logl_scaled        = 0.0 / 0.0;
+    Z_logl_scaled_pValue = 0.0 / 0.0;
+    X2_scaled            = 0.0 / 0.0;
+    X2_scaled_pValue     = 0.0 / 0.0;
+    G2_scaled            = 0.0 / 0.0;
+    G2_scaled_pValue     = 0.0 / 0.0;
+    
+    if (dist_type == 1 & link_type == 1) {
+    #
+    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
+    #
+        if (link_power == 0.0) {
+            is_zero_Y = ppred (Y, 0.0, "==");
+            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
+        } else {
+            lt_saturated = Y ^ link_power;
+        }
+        Y_counts = ones_rec;
+
+        X2_pearson = sum ((Y - means) ^ 2 / vars);
+        df_pearson = num_records - num_features;
+
+        log_l_part = 
+            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
+        log_l_part_saturated = 
+            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
+            
+        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
+        df_deviance = num_records - num_features;
+        
+    } else { if (dist_type >= 2) {
+    #
+    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
+    #
+        if (ncol (Y) == 1) {
+            num_categories = ncol (beta) + 1;
+            if (min (Y) <= 0) { 
+                # Category labels "0", "-1" etc. are converted into the baseline label
+                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
+            }
+            Y_size = min (num_categories, max(Y));
+            Y_unsized = table (seq (1, num_records, 1), Y);
+            Y = matrix (0, rows = num_records, cols = num_categories);
+            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
+            Y_counts = ones_rec;
+        } else {
+            Y_counts = rowSums (Y);
+        }
+        
+        P = means;
+        zero_Y = ppred (Y, 0.0, "==");
+        zero_P = ppred (P, 0.0, "==");
+        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+        
+        logl_vec = rowSums (Y *  log (P + zero_Y)   );
+        ent1_vec = rowSums (P *  log (P + zero_P)   );
+        ent2_vec = rowSums (P * (log (P + zero_P))^2);
+        E_logl   = sum (Y_counts * ent1_vec);
+        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
+        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
+        
+        means = means * (Y_counts %*% t(ones_ctg));
+        vars  = vars  * (Y_counts %*% t(ones_ctg));
+        
+        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
+        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
+        
+        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
+            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
+        }
+        
+        X2_pearson = sum ((Y - means) ^ 2 / means);
+        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
+        
+        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
+        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
+    }}
+    
+    if (Z_logl == Z_logl) {
+        Z_logl_absneg = - abs (Z_logl);
+        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
+    }
+    if (X2_pearson == X2_pearson & df_pearson > 0) {
+        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_deviance == G2_deviance & df_deviance > 0) {
+        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    Z_logl_scaled = Z_logl / sqrt (dispersion);
+    X2_scaled = X2_pearson / dispersion;
+    G2_scaled = G2_deviance / dispersion;
+
+    if (Z_logl_scaled == Z_logl_scaled) {
+        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
+        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
+    }
+    if (X2_scaled == X2_scaled & df_pearson > 0) {
+        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_scaled == G2_scaled & df_deviance > 0) {
+        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
+    avg_res_Y = colSums (Y - means) / sum (Y_counts);
+    
+    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
+    ss_res_Y     = colSums ((Y - means) ^ 2);
+    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
+    
+    df_ss_res_Y  = sum (Y_counts) - num_features;
+    if (is_intercept) {
+        df_ss_avg_res_Y = df_ss_res_Y;
+    } else {
+        df_ss_avg_res_Y = df_ss_res_Y - 1;
+    }
+    
+    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
+    if (df_ss_avg_res_Y > 0) {
+        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
+    } else {
+        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
+    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
+    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
+    if (df_ss_res_Y > 0) {
+        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
+    } else {
+        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    
+    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
+    
+    # PREPARING THE OUTPUT CSV STATISTICS FILE
+    
+    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
+    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
+    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
+    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
+    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
+    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
+    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
+    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
+    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
+    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
+    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
+    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
+
+    for (i in 1:ncol(Y)) {
+        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
+        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
+        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
+        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
+        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
+        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
+        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
+        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
+        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
+    }
+    
+    if (fileO != " ") {
+        write (str, fileO);
+    } else {
+        print (str);
+    }
+}
+
+glm_means_and_vars = 
+    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
+    return (Matrix[double] means, Matrix[double] vars)
+    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
+{
+    num_points = nrow (linear_terms);
+    if (dist_type == 1 & link_type == 1) {
+    # POWER DISTRIBUTION
+        if          (link_power ==  0.0) {
+            y_mean = exp (linear_terms);
+        } else { if (link_power ==  1.0) {
+            y_mean = linear_terms;
+        } else { if (link_power == -1.0) {
+            y_mean = 1.0 / linear_terms;
+        } else {
+            y_mean = linear_terms ^ (1.0 / link_power);
+        }}}
+        if (var_power == 0.0) {
+            var_function = matrix (1.0, rows = num_points, cols = 1);
+        } else { if (var_power == 1.0) {
+            var_function = y_mean;
+        } else {
+            var_function = y_mean ^ var_power;
+        }}
+        means = y_mean;
+        vars = var_function;
+    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
+    # BINOMIAL/BERNOULLI DISTRIBUTION
+        y_prob = matrix (0.0, rows = num_points, cols = 2);
+        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+            y_prob [, 1]  = exp (linear_terms);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
+            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 2)                      { # Binomial.logit
+            elt = exp (linear_terms);
+            y_prob [, 1]  = elt / (1.0 + elt);
+            y_prob [, 2]  = 1.0 / (1.0 + elt);
+        } else { if (link_type == 3)                      { # Binomial.probit
+            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
+            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
+            erf_corr =
+                t_gp * ( 0.254829592 
+              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
+              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
+              + t_gp * (-1.453152027 
+              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
+            y_prob [, 1] = (1 + sign_lt) - erf_corr;
+            y_prob [, 2] = (1 - sign_lt) + erf_corr;
+            y_prob = y_prob / 2;
+        } else { if (link_type == 4)                      { # Binomial.cloglog
+            elt = exp (linear_terms);
+            is_too_small = ppred (10000000 + elt, 10000000, "==");
+            y_prob [, 2] = exp (- elt);
+            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
+        } else { if (link_type == 5)                      { # Binomial.cauchit
+            atan_linear_terms = atan (linear_terms);
+            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
+            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
+        }}}}}}
+        means = y_prob;
+        ones_ctg = matrix (1, rows = 2, cols = 1);
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else { if (dist_type == 3) {
+    # MULTINOMIAL LOGIT DISTRIBUTION
+        elt = exp (linear_terms);
+        ones_pts = matrix (1, rows = num_points, cols = 1);
+        elt = append (elt, ones_pts);
+        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
+        means = elt / (rowSums (elt) %*% t(ones_ctg));
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else {
+        means = matrix (0.0, rows = num_points, cols = 1);
+        vars  = matrix (0.0, rows = num_points, cols = 1);
+}   }}}
+
+glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
+    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
+    return (Matrix[double] log_l_part)
+{
+    num_records = nrow (Y);
+    if (var_power == 1.0) { # Poisson
+        if (link_power == 0.0)  { # Poisson.log
+            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
+            b_cumulant = exp (linear_terms);
+        } else {                  # Poisson.power_nonlog
+            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
+            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
+        }
+        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
+        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
+    } else {
+        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
+            natural_parameters = - exp (- linear_terms);
+            b_cumulant = linear_terms;
+        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
+            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
+            b_cumulant = log (linear_terms) / link_power;
+        } else { if (link_power == 0.0) { # PowerDist.log
+            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
+            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
+        } else {                          # PowerDist.power_nonlog
+            power_np = (1.0 - var_power) / link_power;
+            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
+            power_cu = (2.0 - var_power) / link_power;
+            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
+        }}}
+        log_l_part = Y * natural_parameters - b_cumulant;
+}   }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/jmlc/reuse-msvm-predict.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/reuse-msvm-predict.dml b/src/test/scripts/functions/jmlc/reuse-msvm-predict.dml
index 8bd41e1..026b88b 100644
--- a/src/test/scripts/functions/jmlc/reuse-msvm-predict.dml
+++ b/src/test/scripts/functions/jmlc/reuse-msvm-predict.dml
@@ -1,74 +1,74 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-cmdLine_Y = ifdef($Y, " ")
-cmdLine_confusion = ifdef($confusion, " ")
-cmdLine_accuracy = ifdef($accuracy, " ")
-cmdLine_scores = ifdef($scores, " ")
-cmdLine_fmt = ifdef($fmt, "text")
-
-X = read ($X);
-N = max(X[,1]);
-
-W = read($W);
-dimensions = as.scalar(W[nrow(W),1])
-intercept = as.scalar(W[nrow(W)-1,1])
-W = W[1:(nrow(W)-2),]
-
-#X = table(X[,1], X[,2], X[,3], N, dimensions)
-#print(nrow(X) + " " + ncol(X) + " " + dimensions)
-
-num_classes = ncol(W)
-m=ncol(X);
-
-b = matrix(0, rows=1, cols=num_classes)
-if (intercept == 1)
-	b = W[m+1,]
-
-ones = matrix(1, rows=N, cols=1)
-scores = X %*% W[1:m,] + ones %*% b;
-predicted_y = rowIndexMax(scores);
-	
-if(sum(predicted_y) != 0)
-	write(predicted_y, cmdLine_scores, format=cmdLine_fmt);
-
-if(cmdLine_Y != " "){
-	y = read(cmdLine_Y);
-	
-	if(min(y) < 1)
-		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
-	
-	correct_percentage = sum(ppred(predicted_y - y, 0, "==")) / N * 100;
-	
-	acc_str = "Accuracy (%): " + correct_percentage
-	print(acc_str)
-	if(cmdLine_accuracy != " ")
-		write(acc_str, cmdLine_accuracy)
-
-	num_classes_ground_truth = max(y)
-	if(num_classes < num_classes_ground_truth)
-		num_classes = num_classes_ground_truth
-
-	if(cmdLine_confusion != " "){
-		confusion_mat = table(predicted_y, y, num_classes, num_classes)
-		write(confusion_mat, cmdLine_confusion, format="csv")
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmdLine_Y = ifdef($Y, " ")
+cmdLine_confusion = ifdef($confusion, " ")
+cmdLine_accuracy = ifdef($accuracy, " ")
+cmdLine_scores = ifdef($scores, " ")
+cmdLine_fmt = ifdef($fmt, "text")
+
+X = read ($X);
+N = max(X[,1]);
+
+W = read($W);
+dimensions = as.scalar(W[nrow(W),1])
+intercept = as.scalar(W[nrow(W)-1,1])
+W = W[1:(nrow(W)-2),]
+
+#X = table(X[,1], X[,2], X[,3], N, dimensions)
+#print(nrow(X) + " " + ncol(X) + " " + dimensions)
+
+num_classes = ncol(W)
+m=ncol(X);
+
+b = matrix(0, rows=1, cols=num_classes)
+if (intercept == 1)
+	b = W[m+1,]
+
+ones = matrix(1, rows=N, cols=1)
+scores = X %*% W[1:m,] + ones %*% b;
+predicted_y = rowIndexMax(scores);
+	
+if(sum(predicted_y) != 0)
+	write(predicted_y, cmdLine_scores, format=cmdLine_fmt);
+
+if(cmdLine_Y != " "){
+	y = read(cmdLine_Y);
+	
+	if(min(y) < 1)
+		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+	
+	correct_percentage = sum(ppred(predicted_y - y, 0, "==")) / N * 100;
+	
+	acc_str = "Accuracy (%): " + correct_percentage
+	print(acc_str)
+	if(cmdLine_accuracy != " ")
+		write(acc_str, cmdLine_accuracy)
+
+	num_classes_ground_truth = max(y)
+	if(num_classes < num_classes_ground_truth)
+		num_classes = num_classes_ground_truth
+
+	if(cmdLine_confusion != " "){
+		confusion_mat = table(predicted_y, y, num_classes, num_classes)
+		write(confusion_mat, cmdLine_confusion, format="csv")
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ForScalarAssignmentTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ForScalarAssignmentTest.dml b/src/test/scripts/functions/misc/ForScalarAssignmentTest.dml
index d5d7ecb..818f3ba 100644
--- a/src/test/scripts/functions/misc/ForScalarAssignmentTest.dml
+++ b/src/test/scripts/functions/misc/ForScalarAssignmentTest.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-val = $1;
-for( i in 1:10 ){
-   val = $1;
-}
-
+
+val = $1;
+for( i in 1:10 ){
+   val = $1;
+}
+
 print(val);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.R b/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.R
index 20b2c47..ce3b35b 100644
--- a/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.R
+++ b/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.R
@@ -19,29 +19,29 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-foo <- function(A)
-{
-   for( i in 1:1 ) {
-     continue = TRUE;
-     if( sum(A)<0 ) {
-        continue = FALSE;
-     }
-     iter = 0;
-     if( continue ) {
-        iter = iter+1;
-     }
-     B = A+iter;
-   }
-   
-   return(B);
-}
-
-A = matrix(1, 10, 10)
-R = foo(A)
-
-writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+foo <- function(A)
+{
+   for( i in 1:1 ) {
+     continue = TRUE;
+     if( sum(A)<0 ) {
+        continue = FALSE;
+     }
+     iter = 0;
+     if( continue ) {
+        iter = iter+1;
+     }
+     B = A+iter;
+   }
+   
+   return(B);
+}
+
+A = matrix(1, 10, 10)
+R = foo(A)
+
+writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.dml b/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.dml
index 0b82fab..6c221bb 100644
--- a/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.dml
+++ b/src/test/scripts/functions/misc/IPALiteralReplacement_ForIf.dml
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function( Matrix[Double] A ) return( Matrix[Double] B )
-{
-   for( i in 1:1 ) {
-     continue = TRUE;
-     if( sum(A)<0 ) {
-        continue = FALSE;
-     }
-     iter = 0;
-     if( continue ) {
-        iter = iter+1;
-     }
-     B = A+iter;
-   }
-}
-
-
-A = matrix(1, rows=10, cols=10);
-R = foo(A);
-
+
+foo = function( Matrix[Double] A ) return( Matrix[Double] B )
+{
+   for( i in 1:1 ) {
+     continue = TRUE;
+     if( sum(A)<0 ) {
+        continue = FALSE;
+     }
+     iter = 0;
+     if( continue ) {
+        iter = iter+1;
+     }
+     B = A+iter;
+   }
+}
+
+
+A = matrix(1, rows=10, cols=10);
+R = foo(A);
+
 write(R, $1);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPALiteralReplacement_While.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPALiteralReplacement_While.R b/src/test/scripts/functions/misc/IPALiteralReplacement_While.R
index 0f8fda4..afcdeb6 100644
--- a/src/test/scripts/functions/misc/IPALiteralReplacement_While.R
+++ b/src/test/scripts/functions/misc/IPALiteralReplacement_While.R
@@ -19,28 +19,28 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-foo <- function(A)
-{
-   continue = TRUE;
-   iter = 0;
-   while( continue )
-   {
-      iter = iter+1;
-      if( iter<10 ){
-         continue = TRUE;
-      } else {
-         continue = FALSE;
-      }
-   }
-   B = A+iter;
-}
-
-A = matrix(1, 10, 10)
-R = foo(A)
-
-writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+foo <- function(A)
+{
+   continue = TRUE;
+   iter = 0;
+   while( continue )
+   {
+      iter = iter+1;
+      if( iter<10 ){
+         continue = TRUE;
+      } else {
+         continue = FALSE;
+      }
+   }
+   B = A+iter;
+}
+
+A = matrix(1, 10, 10)
+R = foo(A)
+
+writeMM(as(R, "CsparseMatrix"), paste(args[1], "R", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPALiteralReplacement_While.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPALiteralReplacement_While.dml b/src/test/scripts/functions/misc/IPALiteralReplacement_While.dml
index 4977607..08ec41d 100644
--- a/src/test/scripts/functions/misc/IPALiteralReplacement_While.dml
+++ b/src/test/scripts/functions/misc/IPALiteralReplacement_While.dml
@@ -19,24 +19,24 @@
 #
 #-------------------------------------------------------------
 
-
-foo = function( Matrix[Double] A ) return( Matrix[Double] B )
-{
-   continue = TRUE;
-   iter = 0;
-   while( continue )
-   {
-      iter = iter+1;
-      if( iter<10 )
-          continue = TRUE;
-      else
-          continue = FALSE;
-   }
-   B = A+iter;
-}
-
-
-A = matrix(1, rows=10, cols=10);
-R = foo(A);
-
+
+foo = function( Matrix[Double] A ) return( Matrix[Double] B )
+{
+   continue = TRUE;
+   iter = 0;
+   while( continue )
+   {
+      iter = iter+1;
+      if( iter<10 )
+          continue = TRUE;
+      else
+          continue = FALSE;
+   }
+   B = A+iter;
+}
+
+
+A = matrix(1, rows=10, cols=10);
+R = foo(A);
+
 write(R, $1);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPAScalarRecursion.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPAScalarRecursion.dml b/src/test/scripts/functions/misc/IPAScalarRecursion.dml
index b9ce1f3..115f6dd 100644
--- a/src/test/scripts/functions/misc/IPAScalarRecursion.dml
+++ b/src/test/scripts/functions/misc/IPAScalarRecursion.dml
@@ -19,20 +19,20 @@
 #
 #-------------------------------------------------------------
 
-
-
-powering = function (int index)
-  return (int value)
-{
-   if(index == 1) {
-      value = 2;
-   }
-   else {
-      ret = powering(index-1);
-      value = 2 * ret;
-   }
-}
-
-n = $1;
-v = powering(n);
-print("Result is " + v);
+
+
+powering = function (int index)
+  return (int value)
+{
+   if(index == 1) {
+      value = 2;
+   }
+   else {
+      ret = powering(index-1);
+      value = 2 * ret;
+   }
+}
+
+n = $1;
+v = powering(n);
+print("Result is " + v);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPAUnknownRecursion.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPAUnknownRecursion.R b/src/test/scripts/functions/misc/IPAUnknownRecursion.R
index 1b61f79..edc3314 100644
--- a/src/test/scripts/functions/misc/IPAUnknownRecursion.R
+++ b/src/test/scripts/functions/misc/IPAUnknownRecursion.R
@@ -19,33 +19,33 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-factorial <- function(arr, pos){
-	if(pos == 1){
-     arr[1, pos] = 1
-	} else {
-		arr = factorial(arr, pos-1)
-		arr[1, pos] = pos * arr[1, pos-1]
-	}
-
-  return(arr);	
-}
-
-n = as.integer(args[1])
-arr = matrix(0, 1, n)
-arr = factorial(arr, n)
-
-R = matrix(0, 1, n);
-for(i in 1:n)
-{
-   R[1,i] = arr[1, i];
-}
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
-
-
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+factorial <- function(arr, pos){
+	if(pos == 1){
+     arr[1, pos] = 1
+	} else {
+		arr = factorial(arr, pos-1)
+		arr[1, pos] = pos * arr[1, pos-1]
+	}
+
+  return(arr);	
+}
+
+n = as.integer(args[1])
+arr = matrix(0, 1, n)
+arr = factorial(arr, n)
+
+R = matrix(0, 1, n);
+for(i in 1:n)
+{
+   R[1,i] = arr[1, i];
+}
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IPAUnknownRecursion.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IPAUnknownRecursion.dml b/src/test/scripts/functions/misc/IPAUnknownRecursion.dml
index 1a852ab..22570b8 100644
--- a/src/test/scripts/functions/misc/IPAUnknownRecursion.dml
+++ b/src/test/scripts/functions/misc/IPAUnknownRecursion.dml
@@ -19,27 +19,27 @@
 #
 #-------------------------------------------------------------
 
-
-factorial = function(Matrix[Double] arr, Integer pos) return (Matrix[Double] arr){
-	if(pos == 1) arr[1, pos] = 1
-	else{
-		arr = factorial(arr, pos-1)
-		arr[1, pos] = pos * arr[1, pos-1]
-	}
-	
-	for(i in 1:ncol(arr))
-		print("inside factorial (" + pos + ") " + i + ": " + castAsScalar(arr[1, i]))
-}
-
-n = $1
-arr = matrix(0, rows=1, cols=n)
-arr = factorial(arr, n)
-
-R = matrix(0, rows=1, cols=n);
-for(i in 1:n) #copy important to test dynamic rewrites
-{
-   print("main factorial " + i + ": " + castAsScalar(arr[1, i]))
-   R[1,i] = as.scalar(arr[1, i]);
-}
-
+
+factorial = function(Matrix[Double] arr, Integer pos) return (Matrix[Double] arr){
+	if(pos == 1) arr[1, pos] = 1
+	else{
+		arr = factorial(arr, pos-1)
+		arr[1, pos] = pos * arr[1, pos-1]
+	}
+	
+	for(i in 1:ncol(arr))
+		print("inside factorial (" + pos + ") " + i + ": " + castAsScalar(arr[1, i]))
+}
+
+n = $1
+arr = matrix(0, rows=1, cols=n)
+arr = factorial(arr, n)
+
+R = matrix(0, rows=1, cols=n);
+for(i in 1:n) #copy important to test dynamic rewrites
+{
+   print("main factorial " + i + ": " + castAsScalar(arr[1, i]))
+   R[1,i] = as.scalar(arr[1, i]);
+}
+
 write(R, $2);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/IfScalarAssignmentTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/IfScalarAssignmentTest.dml b/src/test/scripts/functions/misc/IfScalarAssignmentTest.dml
index 2fc4365..460bf21 100644
--- a/src/test/scripts/functions/misc/IfScalarAssignmentTest.dml
+++ b/src/test/scripts/functions/misc/IfScalarAssignmentTest.dml
@@ -19,13 +19,13 @@
 #
 #-------------------------------------------------------------
 
-
-val = $1;
-if( 1==1){
-   val = $1;
-}
-else{
-   val = $1;
-}
-
+
+val = $1;
+if( 1==1){
+   val = $1;
+}
+else{
+   val = $1;
+}
+
 print(val);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/InvalidFunctionSignatureTest1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/InvalidFunctionSignatureTest1.dml b/src/test/scripts/functions/misc/InvalidFunctionSignatureTest1.dml
index dcdb2da..fdce520 100644
--- a/src/test/scripts/functions/misc/InvalidFunctionSignatureTest1.dml
+++ b/src/test/scripts/functions/misc/InvalidFunctionSignatureTest1.dml
@@ -1,30 +1,30 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-printFirst = function(matrix[double] mat) {
-	print(as.integer(as.scalar(mat[1,1])));
-	for (i in 1:2){
-    print("i="+i);
-  }
-}
-
-A = matrix("1 2 3 4", rows=2, cols=2);
-z = printFirst(A);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+printFirst = function(matrix[double] mat) {
+	print(as.integer(as.scalar(mat[1,1])));
+	for (i in 1:2){
+    print("i="+i);
+  }
+}
+
+A = matrix("1 2 3 4", rows=2, cols=2);
+z = printFirst(A);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/InvalidFunctionSignatureTest2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/InvalidFunctionSignatureTest2.dml b/src/test/scripts/functions/misc/InvalidFunctionSignatureTest2.dml
index e003e80..2a95a50 100644
--- a/src/test/scripts/functions/misc/InvalidFunctionSignatureTest2.dml
+++ b/src/test/scripts/functions/misc/InvalidFunctionSignatureTest2.dml
@@ -1,30 +1,30 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-printFirst = function(matrix[integer] mat) {
-	print(as.integer(as.scalar(mat[1,1])));
-	for (i in 1:2){
-    print("i="+i);
-  }
-}
-
-A = matrix("1 2 3 4", rows=2, cols=2);
-z = printFirst(A);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+printFirst = function(matrix[integer] mat) {
+	print(as.integer(as.scalar(mat[1,1])));
+	for (i in 1:2){
+    print("i="+i);
+  }
+}
+
+A = matrix("1 2 3 4", rows=2, cols=2);
+z = printFirst(A);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/LengthStringTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/LengthStringTest.dml b/src/test/scripts/functions/misc/LengthStringTest.dml
index 25b0ff1..06b3b05 100644
--- a/src/test/scripts/functions/misc/LengthStringTest.dml
+++ b/src/test/scripts/functions/misc/LengthStringTest.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = rand(rows=$1,cols=$2,sparsity=0.01);
-Y = removeEmpty(target=X,margin="rows");
-
+
+X = rand(rows=$1,cols=$2,sparsity=0.01);
+Y = removeEmpty(target=X,margin="rows");
+
 print(length(Y));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/LengthUnknownCSVTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/LengthUnknownCSVTest.dml b/src/test/scripts/functions/misc/LengthUnknownCSVTest.dml
index 64cc46c..84a2a31 100644
--- a/src/test/scripts/functions/misc/LengthUnknownCSVTest.dml
+++ b/src/test/scripts/functions/misc/LengthUnknownCSVTest.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, format="csv");
+
+A = read($1, format="csv");
 print(sum(A[sqrt(length(A))-1,]));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/LongOverflowForLoop.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/LongOverflowForLoop.dml b/src/test/scripts/functions/misc/LongOverflowForLoop.dml
index 20c8d01..e3440fc 100644
--- a/src/test/scripts/functions/misc/LongOverflowForLoop.dml
+++ b/src/test/scripts/functions/misc/LongOverflowForLoop.dml
@@ -19,14 +19,14 @@
 #
 #-------------------------------------------------------------
 
-
-val1 = $1;
-val2 = $2;
-val3 = -1;
-
-for( i in val1:val2 )
-{
-    val3 = i;
-}
-
-print("Result="+val3);
+
+val1 = $1;
+val2 = $2;
+val3 = -1;
+
+for( i in val1:val2 )
+{
+    val3 = i;
+}
+
+print("Result="+val3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/LongOverflowMult.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/LongOverflowMult.dml b/src/test/scripts/functions/misc/LongOverflowMult.dml
index f660a34..502e22f 100644
--- a/src/test/scripts/functions/misc/LongOverflowMult.dml
+++ b/src/test/scripts/functions/misc/LongOverflowMult.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-val1 = $1;
-val2 = $2;
-
-val3 = val1*val2;
-
-print("Result="+val3);
+
+val1 = $1;
+val2 = $2;
+
+val3 = val1*val2;
+
+print("Result="+val3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/LongOverflowPlus.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/LongOverflowPlus.dml b/src/test/scripts/functions/misc/LongOverflowPlus.dml
index d082ed0..8f69f09 100644
--- a/src/test/scripts/functions/misc/LongOverflowPlus.dml
+++ b/src/test/scripts/functions/misc/LongOverflowPlus.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-val1 = $1;
-val2 = $2;
-
-val3 = val1+val2;
-
-print("Result="+val3);
+
+val1 = $1;
+val2 = $2;
+
+val3 = val1+val2;
+
+print("Result="+val3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/NcolStringTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/NcolStringTest.dml b/src/test/scripts/functions/misc/NcolStringTest.dml
index f9fb1cb..2bc624e 100644
--- a/src/test/scripts/functions/misc/NcolStringTest.dml
+++ b/src/test/scripts/functions/misc/NcolStringTest.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = rand(rows=$1,cols=$2,sparsity=0.01);
-Y = removeEmpty(target=X,margin="rows");
-
+
+X = rand(rows=$1,cols=$2,sparsity=0.01);
+Y = removeEmpty(target=X,margin="rows");
+
 print(nrow(Y));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/NcolUnknownCSVTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/NcolUnknownCSVTest.dml b/src/test/scripts/functions/misc/NcolUnknownCSVTest.dml
index 5434fdb..fdaf16c 100644
--- a/src/test/scripts/functions/misc/NcolUnknownCSVTest.dml
+++ b/src/test/scripts/functions/misc/NcolUnknownCSVTest.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, format="csv");
+
+A = read($1, format="csv");
 print(sum(A[,ncol(A)-3]));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/NrowStringTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/NrowStringTest.dml b/src/test/scripts/functions/misc/NrowStringTest.dml
index f9fb1cb..2bc624e 100644
--- a/src/test/scripts/functions/misc/NrowStringTest.dml
+++ b/src/test/scripts/functions/misc/NrowStringTest.dml
@@ -19,8 +19,8 @@
 #
 #-------------------------------------------------------------
 
-
-X = rand(rows=$1,cols=$2,sparsity=0.01);
-Y = removeEmpty(target=X,margin="rows");
-
+
+X = rand(rows=$1,cols=$2,sparsity=0.01);
+Y = removeEmpty(target=X,margin="rows");
+
 print(nrow(Y));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/NrowUnknownCSVTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/NrowUnknownCSVTest.dml b/src/test/scripts/functions/misc/NrowUnknownCSVTest.dml
index 9ad3cac..0afd68b 100644
--- a/src/test/scripts/functions/misc/NrowUnknownCSVTest.dml
+++ b/src/test/scripts/functions/misc/NrowUnknownCSVTest.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-
-A = read($1, format="csv");
+
+A = read($1, format="csv");
 print(sum(A[nrow(A)-3,]));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/OuterExpandTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/OuterExpandTest.R b/src/test/scripts/functions/misc/OuterExpandTest.R
index 81a1f24..b581a4a 100644
--- a/src/test/scripts/functions/misc/OuterExpandTest.R
+++ b/src/test/scripts/functions/misc/OuterExpandTest.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-m = as.integer(args[2]);
-left = as.logical(args[3]);
-
-if( left ){
-   C = outer(as.vector(A), seq(1,m), "==");
-} else {
-   C = outer(seq(1,m), as.vector(A), "==");
-}
-
-
-writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[4], "C", sep="")); 
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+m = as.integer(args[2]);
+left = as.logical(args[3]);
+
+if( left ){
+   C = outer(as.vector(A), seq(1,m), "==");
+} else {
+   C = outer(seq(1,m), as.vector(A), "==");
+}
+
+
+writeMM(as(as.matrix(C), "CsparseMatrix"), paste(args[4], "C", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/OuterExpandTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/OuterExpandTest.dml b/src/test/scripts/functions/misc/OuterExpandTest.dml
index b0b931e..b312677 100644
--- a/src/test/scripts/functions/misc/OuterExpandTest.dml
+++ b/src/test/scripts/functions/misc/OuterExpandTest.dml
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-A = read($1);
-m = $2;
-left = $3;
-
-if( left ){
-   C = outer(A, t(seq(1,m)), "==");
-}
-else{
-   C = outer(seq(1,m), t(A), "==");
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+m = $2;
+left = $3;
+
+if( left ){
+   C = outer(A, t(seq(1,m)), "==");
+}
+else{
+   C = outer(seq(1,m), t(A), "==");
+}
+
 write(C, $4);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PackageFunCall1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PackageFunCall1.dml b/src/test/scripts/functions/misc/PackageFunCall1.dml
index 6644006..3ea17cc 100644
--- a/src/test/scripts/functions/misc/PackageFunCall1.dml
+++ b/src/test/scripts/functions/misc/PackageFunCall1.dml
@@ -1,25 +1,25 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-source("PackageFunLib.dml") as Other
-dummy = Other::hello()
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("PackageFunLib.dml") as Other
+dummy = Other::hello()
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PackageFunCall2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PackageFunCall2.dml b/src/test/scripts/functions/misc/PackageFunCall2.dml
index b9b35b2..9453349 100644
--- a/src/test/scripts/functions/misc/PackageFunCall2.dml
+++ b/src/test/scripts/functions/misc/PackageFunCall2.dml
@@ -1,26 +1,26 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-setwd(".")
-source("PackageFunLib.dml") as Other
-dummy = Other::hello()
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+setwd(".")
+source("PackageFunLib.dml") as Other
+dummy = Other::hello()
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PackageFunLib.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PackageFunLib.dml b/src/test/scripts/functions/misc/PackageFunLib.dml
index 2ed4325..e97b679 100644
--- a/src/test/scripts/functions/misc/PackageFunLib.dml
+++ b/src/test/scripts/functions/misc/PackageFunLib.dml
@@ -1,26 +1,26 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-hello = function() {
-  print("Hi!")
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+hello = function() {
+  print("Hi!")
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ParForScalarAssignmentTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ParForScalarAssignmentTest.dml b/src/test/scripts/functions/misc/ParForScalarAssignmentTest.dml
index cc42f08..20ff692 100644
--- a/src/test/scripts/functions/misc/ParForScalarAssignmentTest.dml
+++ b/src/test/scripts/functions/misc/ParForScalarAssignmentTest.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-val = $1;
-parfor( i in 1:10 ){
-   val = $1;
-}
-
+
+val = $1;
+parfor( i in 1:10 ){
+   val = $1;
+}
+
 print(val);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PrintExpressionTest1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PrintExpressionTest1.dml b/src/test/scripts/functions/misc/PrintExpressionTest1.dml
index 0c7a475..b10bcfd 100644
--- a/src/test/scripts/functions/misc/PrintExpressionTest1.dml
+++ b/src/test/scripts/functions/misc/PrintExpressionTest1.dml
@@ -1,23 +1,23 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-x = TRUE;
-print(!x);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+x = TRUE;
+print(!x);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PrintExpressionTest2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PrintExpressionTest2.dml b/src/test/scripts/functions/misc/PrintExpressionTest2.dml
index 251ce3a..36b9a41 100644
--- a/src/test/scripts/functions/misc/PrintExpressionTest2.dml
+++ b/src/test/scripts/functions/misc/PrintExpressionTest2.dml
@@ -1,23 +1,23 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-x = 7;
-print(-x);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+x = 7;
+print(-x);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/PrintMatrixTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/PrintMatrixTest.dml b/src/test/scripts/functions/misc/PrintMatrixTest.dml
index e380c0a..6f889e5 100644
--- a/src/test/scripts/functions/misc/PrintMatrixTest.dml
+++ b/src/test/scripts/functions/misc/PrintMatrixTest.dml
@@ -1,37 +1,37 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-printm = function(string name, matrix[double] M) return (matrix[double] M) {
-  print("printing matrix = " + name)
-  for (i in 1:nrow(M)) {
-    for (j in 1:ncol(M)) {
-       e = as.scalar(M[i,j])
-       print(" " + i + " " + j + " " + e)
-    }
-  }
-  print("done printing matrix = " + name)
-  M = M
-}
-
-MM1 = matrix(7, rows=2, cols=2)
-str_name = "MM1"
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+printm = function(string name, matrix[double] M) return (matrix[double] M) {
+  print("printing matrix = " + name)
+  for (i in 1:nrow(M)) {
+    for (j in 1:ncol(M)) {
+       e = as.scalar(M[i,j])
+       print(" " + i + " " + j + " " + e)
+    }
+  }
+  print("done printing matrix = " + name)
+  M = M
+}
+
+MM1 = matrix(7, rows=2, cols=2)
+str_name = "MM1"
+
 ignore = printm(str_name, MM1)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ReadAfterWriteMatrix1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ReadAfterWriteMatrix1.dml b/src/test/scripts/functions/misc/ReadAfterWriteMatrix1.dml
index 6c83c2e..7e9bc6e 100644
--- a/src/test/scripts/functions/misc/ReadAfterWriteMatrix1.dml
+++ b/src/test/scripts/functions/misc/ReadAfterWriteMatrix1.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-A = rand(rows=10, cols=10);
-write(A, $1);
-
-B = read($2);
-print(sum(B));
-
+
+A = rand(rows=10, cols=10);
+write(A, $1);
+
+B = read($2);
+print(sum(B));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ReadAfterWriteMatrix2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ReadAfterWriteMatrix2.dml b/src/test/scripts/functions/misc/ReadAfterWriteMatrix2.dml
index 4a43aff..bfbb81d 100644
--- a/src/test/scripts/functions/misc/ReadAfterWriteMatrix2.dml
+++ b/src/test/scripts/functions/misc/ReadAfterWriteMatrix2.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A = rand(rows=10, cols=10);
-write(A, $1);
-
-if(1==1){}
-
-B = read($2);
-print(sum(B));
-
+
+A = rand(rows=10, cols=10);
+write(A, $1);
+
+if(1==1){}
+
+B = read($2);
+print(sum(B));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ReadAfterWriteScalar1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ReadAfterWriteScalar1.dml b/src/test/scripts/functions/misc/ReadAfterWriteScalar1.dml
index 689bf05..d089f4d 100644
--- a/src/test/scripts/functions/misc/ReadAfterWriteScalar1.dml
+++ b/src/test/scripts/functions/misc/ReadAfterWriteScalar1.dml
@@ -19,10 +19,10 @@
 #
 #-------------------------------------------------------------
 
-
-A = 7;
-write(A, $1);
-
-B = read($2);
-print(B);
-
+
+A = 7;
+write(A, $1);
+
+B = read($2);
+print(B);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/ReadAfterWriteScalar2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/ReadAfterWriteScalar2.dml b/src/test/scripts/functions/misc/ReadAfterWriteScalar2.dml
index 980cd67..43ff306 100644
--- a/src/test/scripts/functions/misc/ReadAfterWriteScalar2.dml
+++ b/src/test/scripts/functions/misc/ReadAfterWriteScalar2.dml
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-A = 7;
-write(A, $1);
-
-if(1==1){}
-
-B = read($2);
-print(B);
-
+
+A = 7;
+write(A, $1);
+
+if(1==1){}
+
+B = read($2);
+print(B);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/functions/misc/RewriteColSumsMVMult.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteColSumsMVMult.R b/src/test/scripts/functions/misc/RewriteColSumsMVMult.R
index e83c3d0..15893ad 100644
--- a/src/test/scripts/functions/misc/RewriteColSumsMVMult.R
+++ b/src/test/scripts/functions/misc/RewriteColSumsMVMult.R
@@ -19,12 +19,12 @@
 #
 #-------------------------------------------------------------
 
-
-args <- commandArgs(TRUE)
-options(digits=22)
-library("Matrix")
-
-X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-R = colSums( X * seq(1,nrow(X))%*%matrix(1,1,ncol(X)) );
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+R = colSums( X * seq(1,nrow(X))%*%matrix(1,1,ncol(X)) );
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); 



[29/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/IQMTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/IQMTest.dml b/src/test/scripts/applications/descriptivestats/IQMTest.dml
index f0b9477..d6234ab 100644
--- a/src/test/scripts/applications/descriptivestats/IQMTest.dml
+++ b/src/test/scripts/applications/descriptivestats/IQMTest.dml
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-$$readhelper$$
-
-V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
-W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
-
-# inter quartile mean
-iqm = interQuartileMean(V)
-iqmHelper1 = iqm * Helper;
-write(iqmHelper1, "$$outdir$$iqm", format="text");
-
-# weighted inter quartile mean
-wiqm = interQuartileMean(V, W)
-iqmHelper2 = wiqm * Helper;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+$$readhelper$$
+
+V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
+W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
+
+# inter quartile mean
+iqm = interQuartileMean(V)
+iqmHelper1 = iqm * Helper;
+write(iqmHelper1, "$$outdir$$iqm", format="text");
+
+# weighted inter quartile mean
+wiqm = interQuartileMean(V, W)
+iqmHelper2 = wiqm * Helper;
 write(iqmHelper2, "$$outdir$$weighted_iqm", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OddsRatio.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OddsRatio.R b/src/test/scripts/applications/descriptivestats/OddsRatio.R
index 6649e94..4994935 100644
--- a/src/test/scripts/applications/descriptivestats/OddsRatio.R
+++ b/src/test/scripts/applications/descriptivestats/OddsRatio.R
@@ -1,75 +1,75 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $CC_HOME is set to the home of the R script
-# Rscript $CC_HOME/OddsRato.R $CC_HOME/in/ $CC_HOME/expected/
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-B = readMM(paste(args[1], "B.mtx", sep=""));
-
-F = table(A[,1],B[,1]);
-
-a11 = F[1,1];
-a12 = F[1,2];
-a21 = F[2,1];
-a22 = F[2,2];
-
-#print(paste(a11, " ", a12, " ", a21, " ", a22));
-
-oddsRatio = (a11*a22)/(a12*a21);
-sigma = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
-left_conf = exp( log(oddsRatio) - 2*sigma )
-right_conf = exp( log(oddsRatio) + 2*sigma )
-sigma_away = abs( log(oddsRatio)/sigma )
-
-# chisq.test returns a list containing statistic, p-value, etc.
-cst = chisq.test(F);
-
-# get the chi-squared coefficient from the list
-chi_squared = as.numeric(cst[1]);
-degFreedom =  as.numeric(cst[2]);
-pValue = as.numeric(cst[3]);
-
-q = min(dim(F));
-W = sum(F);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-
-#print(paste(oddsRatio, " ", sigma, " [", left_conf, ",", right_conf, "] ", sigma_away, " "));
-#print(paste(chi_squared, " ", degFreedom, " [", pValue, ",", cramers_v, "] "));
-
-write(oddsRatio, paste(args[2], "oddsRatio", sep=""));
-write(sigma, paste(args[2], "sigma", sep=""));
-write(left_conf, paste(args[2], "leftConf", sep=""));
-write(right_conf, paste(args[2], "rightConf", sep=""));
-write(sigma_away, paste(args[2], "sigmasAway", sep=""));
-
-#write(chi_squared, paste(args[2], "chiSquared", sep=""));
-#write(degFreedom, paste(args[2], "degFreedom", sep=""));
-#write(pValue, paste(args[2], "pValue", sep=""));
-#write(cramers_v, paste(args[2], "cramersV", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $CC_HOME is set to the home of the R script
+# Rscript $CC_HOME/OddsRato.R $CC_HOME/in/ $CC_HOME/expected/
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+B = readMM(paste(args[1], "B.mtx", sep=""));
+
+F = table(A[,1],B[,1]);
+
+a11 = F[1,1];
+a12 = F[1,2];
+a21 = F[2,1];
+a22 = F[2,2];
+
+#print(paste(a11, " ", a12, " ", a21, " ", a22));
+
+oddsRatio = (a11*a22)/(a12*a21);
+sigma = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
+left_conf = exp( log(oddsRatio) - 2*sigma )
+right_conf = exp( log(oddsRatio) + 2*sigma )
+sigma_away = abs( log(oddsRatio)/sigma )
+
+# chisq.test returns a list containing statistic, p-value, etc.
+cst = chisq.test(F);
+
+# get the chi-squared coefficient from the list
+chi_squared = as.numeric(cst[1]);
+degFreedom =  as.numeric(cst[2]);
+pValue = as.numeric(cst[3]);
+
+q = min(dim(F));
+W = sum(F);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+
+#print(paste(oddsRatio, " ", sigma, " [", left_conf, ",", right_conf, "] ", sigma_away, " "));
+#print(paste(chi_squared, " ", degFreedom, " [", pValue, ",", cramers_v, "] "));
+
+write(oddsRatio, paste(args[2], "oddsRatio", sep=""));
+write(sigma, paste(args[2], "sigma", sep=""));
+write(left_conf, paste(args[2], "leftConf", sep=""));
+write(right_conf, paste(args[2], "rightConf", sep=""));
+write(sigma_away, paste(args[2], "sigmasAway", sep=""));
+
+#write(chi_squared, paste(args[2], "chiSquared", sep=""));
+#write(degFreedom, paste(args[2], "degFreedom", sep=""));
+#write(pValue, paste(args[2], "pValue", sep=""));
+#write(cramers_v, paste(args[2], "cramersV", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OddsRatio.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OddsRatio.dml b/src/test/scripts/applications/descriptivestats/OddsRatio.dml
index 8a62389..ae52e03 100644
--- a/src/test/scripts/applications/descriptivestats/OddsRatio.dml
+++ b/src/test/scripts/applications/descriptivestats/OddsRatio.dml
@@ -1,114 +1,114 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-# DML script to compute oddsRatio and related confidence intervals
-# Input: two column vectors with categorical values w/ number of categories = 2
-
-#A1 = Rand(rows=1400, cols=1, min=1, max=2); 
-#A2 = Rand(rows=1400, cols=1, min=1, max=2); 
-#A1 = round(A1);
-#A2 = round(A2);
-
-A1 = read($1, rows=$2, cols=1);
-A2 = read($3, rows=$2, cols=1);
-
-F = table(A1, A2);
-
-# Perform computations only on 2x2 contingency tables
-if (nrow(F) != 2 | ncol(F) != 2) {
-	print(max(A1) + ", " + max(A2));
-	print("Only 2x2 tables are supported. Contingency table constructed from given data is [" + nrow(F) + ", " + ncol(F) + "]");
-}
-else {
-	[oddsRatio, left_conf, right_conf, sd, chisquared, pvalue, crv, sigma_away, degf] = pair_corr(F);
-	#print("Odds Ratio " + oddsRatio);
-	#print("Standard Devication " + sd);
-	#print("Confidence Interval [" + left_conf + "," + right_conf + "]");
-	#print("Howmany sigma's away [" + sigma_away);
-	#print("Chi-squared Test: statistic = " + chisquared + ", pValue = " + pvalue + ", Cramer's V = " + crv + ", Degrees of Freedom = " + degf);
-	
-	write(oddsRatio, $4);
-	write(sd, $5);
-	write(left_conf, $6);
-	write(right_conf, $7);
-	write(sigma_away, $8);
-	#write(chisquared, $9);
-	#write(degf, $10);
-	#write(pvalue, $11);
-	#write(crv, $12);
-}
-
-# -----------------------------------------------------------------------------------------------
-
-# Given a 2x2 contingency table, it computes oddsRatio and the corresponding confidence interval
-pair_corr = function(Matrix[Double] A) return (Double oddsRatio, Double left_conf, Double right_conf, Double sd, Double chisquared, Double pvalue, Double crv, Double sigma_away, Double df) {
-	a11 = castAsScalar(A[1,1]);
-	a12 = castAsScalar(A[1,2]);
-	a21 = castAsScalar(A[2,1]);
-	a22 = castAsScalar(A[2,2]);
-
-	sd = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
-	oddsRatio = (a11*a22)/(a12*a21);
-
-	[chisquared, df, pvalue, crv] = bivar_cc(A);
-
-	left_conf = exp( log(oddsRatio) - 2*sd )
-	right_conf = exp( log(oddsRatio) + 2*sd )
-	sigma_away = abs( log(oddsRatio)/sd )
-}
-
-# -----------------------------------------------------------------------------------------------
-
-# Given a contingency table, perform the chi-squared test.
-bivar_cc = function(Matrix[Double] F) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    # F = ctable(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = degFreedom;
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+# DML script to compute oddsRatio and related confidence intervals
+# Input: two column vectors with categorical values w/ number of categories = 2
+
+#A1 = Rand(rows=1400, cols=1, min=1, max=2); 
+#A2 = Rand(rows=1400, cols=1, min=1, max=2); 
+#A1 = round(A1);
+#A2 = round(A2);
+
+A1 = read($1, rows=$2, cols=1);
+A2 = read($3, rows=$2, cols=1);
+
+F = table(A1, A2);
+
+# Perform computations only on 2x2 contingency tables
+if (nrow(F) != 2 | ncol(F) != 2) {
+	print(max(A1) + ", " + max(A2));
+	print("Only 2x2 tables are supported. Contingency table constructed from given data is [" + nrow(F) + ", " + ncol(F) + "]");
+}
+else {
+	[oddsRatio, left_conf, right_conf, sd, chisquared, pvalue, crv, sigma_away, degf] = pair_corr(F);
+	#print("Odds Ratio " + oddsRatio);
+	#print("Standard Devication " + sd);
+	#print("Confidence Interval [" + left_conf + "," + right_conf + "]");
+	#print("Howmany sigma's away [" + sigma_away);
+	#print("Chi-squared Test: statistic = " + chisquared + ", pValue = " + pvalue + ", Cramer's V = " + crv + ", Degrees of Freedom = " + degf);
+	
+	write(oddsRatio, $4);
+	write(sd, $5);
+	write(left_conf, $6);
+	write(right_conf, $7);
+	write(sigma_away, $8);
+	#write(chisquared, $9);
+	#write(degf, $10);
+	#write(pvalue, $11);
+	#write(crv, $12);
+}
+
+# -----------------------------------------------------------------------------------------------
+
+# Given a 2x2 contingency table, it computes oddsRatio and the corresponding confidence interval
+pair_corr = function(Matrix[Double] A) return (Double oddsRatio, Double left_conf, Double right_conf, Double sd, Double chisquared, Double pvalue, Double crv, Double sigma_away, Double df) {
+	a11 = castAsScalar(A[1,1]);
+	a12 = castAsScalar(A[1,2]);
+	a21 = castAsScalar(A[2,1]);
+	a22 = castAsScalar(A[2,2]);
+
+	sd = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
+	oddsRatio = (a11*a22)/(a12*a21);
+
+	[chisquared, df, pvalue, crv] = bivar_cc(A);
+
+	left_conf = exp( log(oddsRatio) - 2*sd )
+	right_conf = exp( log(oddsRatio) + 2*sd )
+	sigma_away = abs( log(oddsRatio)/sd )
+}
+
+# -----------------------------------------------------------------------------------------------
+
+# Given a contingency table, perform the chi-squared test.
+bivar_cc = function(Matrix[Double] F) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    # F = ctable(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = degFreedom;
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
index f3fe12d..f7d6de1 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalTest.java
-# command line invocation assuming $OO_HOME is set to the home of the R script
-# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""))
-B = readMM(paste(args[1], "B.mtx", sep=""))
-
-spearman = cor(A[,1],B[,1], method="spearman");
-
-#paste("R value", spearman);
-
-write(spearman, paste(args[2], "Spearman", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalTest.java
+# command line invocation assuming $OO_HOME is set to the home of the R script
+# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""))
+B = readMM(paste(args[1], "B.mtx", sep=""))
+
+spearman = cor(A[,1],B[,1], method="spearman");
+
+#paste("R value", spearman);
+
+write(spearman, paste(args[2], "Spearman", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
index a30818a..cb153d0 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
@@ -1,74 +1,74 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script OrdinalOrdinal.dml?
-# Assume OO_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and B
-# hadoop jar SystemML.jar -f $OO_HOME/OrdinalOrdinal.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/Spearman"
-
-#-------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(matrix[double] X) return (matrix[double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-#-------------------------------------------------------------------------
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-
-# compute contingency table
-F = table(A,B);
-
-catA = nrow(F);  # number of categories in A
-catB = ncol(F);  # number of categories in B
-
-# compute category-wise counts for both the attributes
-R = rowSums(F);
-S = colSums(F);
-
-# compute scores, both are column vectors
-[C] = computeRanks(R);
-meanX = mean(C,R); 
-
-columnS = t(S);
-[D] = computeRanks(columnS);
-
-# scores (C,D) are individual values, and counts (R,S) act as weights
-meanY = mean(D,columnS);
-
-W = sum(F); # total weight, or total #cases
-varX = moment(C,R,2)*(W/(W-1.0));
-varY = moment(D,columnS,2)*(W/(W-1.0));
-covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-sp = covXY/(sqrt(varX)*sqrt(varY));
-
-#print("X: mean " + meanX + ", var " + varX);
-#print("Y: mean " + meanY + ", var " + varY);
-#print("covXY: " + sp);
-
-#sp = spearman(A,B);
-
-write(sp, $4);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script OrdinalOrdinal.dml?
+# Assume OO_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and B
+# hadoop jar SystemML.jar -f $OO_HOME/OrdinalOrdinal.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/Spearman"
+
+#-------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(matrix[double] X) return (matrix[double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+#-------------------------------------------------------------------------
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+
+# compute contingency table
+F = table(A,B);
+
+catA = nrow(F);  # number of categories in A
+catB = ncol(F);  # number of categories in B
+
+# compute category-wise counts for both the attributes
+R = rowSums(F);
+S = colSums(F);
+
+# compute scores, both are column vectors
+[C] = computeRanks(R);
+meanX = mean(C,R); 
+
+columnS = t(S);
+[D] = computeRanks(columnS);
+
+# scores (C,D) are individual values, and counts (R,S) act as weights
+meanY = mean(D,columnS);
+
+W = sum(F); # total weight, or total #cases
+varX = moment(C,R,2)*(W/(W-1.0));
+varY = moment(D,columnS,2)*(W/(W-1.0));
+covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+sp = covXY/(sqrt(varX)*sqrt(varY));
+
+#print("X: mean " + meanX + ", var " + varX);
+#print("Y: mean " + meanY + ", var " + varY);
+#print("covXY: " + sp);
+
+#sp = spearman(A,B);
+
+write(sp, $4);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
index addc779..0c244da 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
@@ -1,46 +1,46 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalWithWeightsTest.java
-# command line invocation assuming $OO_HOME is set to the home of the R script
-# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-Atemp  = readMM(paste(args[1], "A.mtx", sep=""))
-Btemp  = readMM(paste(args[1], "B.mtx", sep=""))
-WMtemp = readMM(paste(args[1], "WM.mtx", sep=""))
-
-#Atemp  = readMM(file="$$indir$$A.mtx"); #readMM(paste(args[1], "A.mtx", sep=""))
-#Btemp  = readMM(file="$$indir$$B.mtx"); #readMM(paste(args[1], "B.mtx", sep=""))
-#WMtemp = readMM(file="$$indir$$WM.mtx"); #readMM(paste(args[1], "WM.mtx", sep=""))
-
-A = rep(Atemp[,1],WMtemp[,1])
-B = rep(Btemp[,1],WMtemp[,1])
-
-spearman = cor(A, B, method="spearman");
-
-#paste("Weighted R value", spearman);
-
-write(spearman, paste(args[2], "Spearman", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalWithWeightsTest.java
+# command line invocation assuming $OO_HOME is set to the home of the R script
+# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+Atemp  = readMM(paste(args[1], "A.mtx", sep=""))
+Btemp  = readMM(paste(args[1], "B.mtx", sep=""))
+WMtemp = readMM(paste(args[1], "WM.mtx", sep=""))
+
+#Atemp  = readMM(file="$$indir$$A.mtx"); #readMM(paste(args[1], "A.mtx", sep=""))
+#Btemp  = readMM(file="$$indir$$B.mtx"); #readMM(paste(args[1], "B.mtx", sep=""))
+#WMtemp = readMM(file="$$indir$$WM.mtx"); #readMM(paste(args[1], "WM.mtx", sep=""))
+
+A = rep(Atemp[,1],WMtemp[,1])
+B = rep(Btemp[,1],WMtemp[,1])
+
+spearman = cor(A, B, method="spearman");
+
+#paste("Weighted R value", spearman);
+
+write(spearman, paste(args[2], "Spearman", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
index 9af57fa..e6b5143 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#A <- Ordinal
-#B <- Ordinal
-#WM <- Weights
-
-#A = read("$$indir$$A", rows=$$rows$$, cols=1, format="text");
-#B = read("$$indir$$B", rows=$$rows$$, cols=1, format="text");
-#WM = read("$$indir$$WM", rows=$$rows$$, cols=1, format="text");
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-# compute contingency table
-F = table(A,B,WM);
-
-catA = nrow(F);  # number of categories in A
-catB = ncol(F);  # number of categories in B
-
-# compute category-wise counts for both the attributes
-R = rowSums(F);
-S = colSums(F);
-
-# compute scores, both are column vectors
-[C] = computeRanks(R);
-meanX = mean(C,R); 
-
-columnS = t(S);
-[D] = computeRanks(columnS);
-
-# scores (C,D) are individual values, and counts (R,S) act as weights
-meanY = mean(D,columnS);
-
-W = sum(F); # total weight, or total #cases
-varX = moment(C,R,2)*(W/(W-1.0));
-varY = moment(D,columnS,2)*(W/(W-1.0));
-covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-sp = covXY/(sqrt(varX)*sqrt(varY));
-
-#print("X: mean " + meanX + ", var " + varX);
-#print("Y: mean " + meanY + ", var " + varY);
-#print("covXY: " + sp);
-
-#sp = spearman(A,B,WM);
-
-write(sp, $5);
-
-
-#-------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-#-------------------------------------------------------------------------
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+#A <- Ordinal
+#B <- Ordinal
+#WM <- Weights
+
+#A = read("$$indir$$A", rows=$$rows$$, cols=1, format="text");
+#B = read("$$indir$$B", rows=$$rows$$, cols=1, format="text");
+#WM = read("$$indir$$WM", rows=$$rows$$, cols=1, format="text");
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+# compute contingency table
+F = table(A,B,WM);
+
+catA = nrow(F);  # number of categories in A
+catB = ncol(F);  # number of categories in B
+
+# compute category-wise counts for both the attributes
+R = rowSums(F);
+S = colSums(F);
+
+# compute scores, both are column vectors
+[C] = computeRanks(R);
+meanX = mean(C,R); 
+
+columnS = t(S);
+[D] = computeRanks(columnS);
+
+# scores (C,D) are individual values, and counts (R,S) act as weights
+meanY = mean(D,columnS);
+
+W = sum(F); # total weight, or total #cases
+varX = moment(C,R,2)*(W/(W-1.0));
+varY = moment(D,columnS,2)*(W/(W-1.0));
+covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+sp = covXY/(sqrt(varX)*sqrt(varY));
+
+#print("X: mean " + meanX + ", var " + varX);
+#print("Y: mean " + meanY + ", var " + varY);
+#print("covXY: " + sp);
+
+#sp = spearman(A,B,WM);
+
+write(sp, $5);
+
+
+#-------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+#-------------------------------------------------------------------------
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/QuantileTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/QuantileTest.dml b/src/test/scripts/applications/descriptivestats/QuantileTest.dml
index a8f2f6d..233fcdf 100644
--- a/src/test/scripts/applications/descriptivestats/QuantileTest.dml
+++ b/src/test/scripts/applications/descriptivestats/QuantileTest.dml
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-$$readhelper$$
-
-V = read("$$indir$$vector", rows=$$rows1$$, cols=1, format="text")
-W = read("$$indir$$weight", rows=$$rows1$$, cols=1, format="text")
-P = read("$$indir$$prob", rows=$$rows2$$, cols=1, format="text")
-
-# quantile
-Q = quantile(V, P)
-write(Q, "$$outdir$$quantile", format="text");
-
-# weighted quantile
-WQ = quantile(V, W, P)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+$$readhelper$$
+
+V = read("$$indir$$vector", rows=$$rows1$$, cols=1, format="text")
+W = read("$$indir$$weight", rows=$$rows1$$, cols=1, format="text")
+P = read("$$indir$$prob", rows=$$rows2$$, cols=1, format="text")
+
+# quantile
+Q = quantile(V, P)
+write(Q, "$$outdir$$quantile", format="text");
+
+# weighted quantile
+WQ = quantile(V, W, P)
 write(WQ, "$$outdir$$weighted_quantile", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/Scale.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Scale.R b/src/test/scripts/applications/descriptivestats/Scale.R
index 0027c87..45c9efb 100644
--- a/src/test/scripts/applications/descriptivestats/Scale.R
+++ b/src/test/scripts/applications/descriptivestats/Scale.R
@@ -1,141 +1,141 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
-# command line invocation assuming $S_HOME is set to the home of the R script
-# Rscript $S_HOME/Scale.R $S_HOME/in/ $S_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-options(repos="http://cran.stat.ucla.edu/") 
-is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
-
-is_plotrix = is.installed("plotrix");
-if ( !is_plotrix ) {
-install.packages("plotrix");
-} 
-library("plotrix");
-
-is_psych = is.installed("psych");
-if ( !is_psych ) {
-install.packages("psych");
-} 
-library("psych")
-
-is_moments = is.installed("moments");
-if( !is_moments){
-install.packages("moments");
-}
-library("moments")
-
-#library("batch")
-library("Matrix")
-
-V = readMM(paste(args[1], "vector.mtx", sep=""))
-P = readMM(paste(args[1], "prob.mtx", sep=""))
-
-n = nrow(V)
-
-# mean
-mu = mean(V)
-
-# variances
-var = var(V[,1])
-
-# standard deviations
-std_dev = sd(V[,1], na.rm = FALSE)
-
-# standard errors of mean
-SE = std.error(V[,1], na.rm)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
-# har_mu = harmonic.mean(V[,1]) -- DML does not support this yet
-
-# geometric means is not currently supported.
-# geom_mu = geometric.mean(V[,1]) -- DML does not support this yet
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = moment(V[,1], order=3, central=TRUE)/(std_dev^3)
-
-# standard error of skewness (not sure how it is defined without the weight)
-se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
-
-# Kurtosis (using binomial formula)
-g2 = moment(V[,1], order=4, central=TRUE)/(var^2)-3
-
-# Standard error of Kurtosis (not sure how it is defined without the weight)
-se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
-
-# median
-md = median(V[,1]) #quantile(V[,1], 0.5, type = 1)
-
-# quantile
-Q = t(quantile(V[,1], P[,1], type = 1))
-
-# inter-quartile mean
-S=c(sort(V[,1]))
-
-q25d=n*0.25
-q75d=n*0.75
-q25i=ceiling(q25d)
-q75i=ceiling(q75d)
-
-iqm = sum(S[(q25i+1):q75i])
-iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-iqm = iqm/(n*0.5)
-
-#print(paste("IQM ", iqm));
-
-# outliers use ppred to describe it
-out_minus = t(as.numeric(V< mu-5*std_dev)*V) 
-out_plus = t(as.numeric(V> mu+5*std_dev)*V)
-
-write(mu, paste(args[2], "mean", sep=""));
-write(std_dev, paste(args[2], "std", sep=""));
-write(SE, paste(args[2], "se", sep=""));
-write(var, paste(args[2], "var", sep=""));
-write(cv, paste(args[2], "cv", sep=""));
-# write(har_mu),paste(args[2], "har", sep=""));
-# write(geom_mu, paste(args[2], "geom", sep=""));
-write(mn, paste(args[2], "min", sep=""));
-write(mx, paste(args[2], "max", sep=""));
-write(rng, paste(args[2], "rng", sep=""));
-write(g1, paste(args[2], "g1", sep=""));
-write(se_g1, paste(args[2], "se_g1", sep=""));
-write(g2, paste(args[2], "g2", sep=""));
-write(se_g2, paste(args[2], "se_g2", sep=""));
-write(md, paste(args[2], "median", sep=""));
-write(iqm, paste(args[2], "iqm", sep=""));
-writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
-writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
-writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
+# command line invocation assuming $S_HOME is set to the home of the R script
+# Rscript $S_HOME/Scale.R $S_HOME/in/ $S_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+options(repos="http://cran.stat.ucla.edu/") 
+is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
+
+is_plotrix = is.installed("plotrix");
+if ( !is_plotrix ) {
+install.packages("plotrix");
+} 
+library("plotrix");
+
+is_psych = is.installed("psych");
+if ( !is_psych ) {
+install.packages("psych");
+} 
+library("psych")
+
+is_moments = is.installed("moments");
+if( !is_moments){
+install.packages("moments");
+}
+library("moments")
+
+#library("batch")
+library("Matrix")
+
+V = readMM(paste(args[1], "vector.mtx", sep=""))
+P = readMM(paste(args[1], "prob.mtx", sep=""))
+
+n = nrow(V)
+
+# mean
+mu = mean(V)
+
+# variances
+var = var(V[,1])
+
+# standard deviations
+std_dev = sd(V[,1], na.rm = FALSE)
+
+# standard errors of mean
+SE = std.error(V[,1], na.rm)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
+# har_mu = harmonic.mean(V[,1]) -- DML does not support this yet
+
+# geometric means is not currently supported.
+# geom_mu = geometric.mean(V[,1]) -- DML does not support this yet
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = moment(V[,1], order=3, central=TRUE)/(std_dev^3)
+
+# standard error of skewness (not sure how it is defined without the weight)
+se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
+
+# Kurtosis (using binomial formula)
+g2 = moment(V[,1], order=4, central=TRUE)/(var^2)-3
+
+# Standard error of Kurtosis (not sure how it is defined without the weight)
+se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
+
+# median
+md = median(V[,1]) #quantile(V[,1], 0.5, type = 1)
+
+# quantile
+Q = t(quantile(V[,1], P[,1], type = 1))
+
+# inter-quartile mean
+S=c(sort(V[,1]))
+
+q25d=n*0.25
+q75d=n*0.75
+q25i=ceiling(q25d)
+q75i=ceiling(q75d)
+
+iqm = sum(S[(q25i+1):q75i])
+iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+iqm = iqm/(n*0.5)
+
+#print(paste("IQM ", iqm));
+
+# outliers use ppred to describe it
+out_minus = t(as.numeric(V< mu-5*std_dev)*V) 
+out_plus = t(as.numeric(V> mu+5*std_dev)*V)
+
+write(mu, paste(args[2], "mean", sep=""));
+write(std_dev, paste(args[2], "std", sep=""));
+write(SE, paste(args[2], "se", sep=""));
+write(var, paste(args[2], "var", sep=""));
+write(cv, paste(args[2], "cv", sep=""));
+# write(har_mu),paste(args[2], "har", sep=""));
+# write(geom_mu, paste(args[2], "geom", sep=""));
+write(mn, paste(args[2], "min", sep=""));
+write(mx, paste(args[2], "max", sep=""));
+write(rng, paste(args[2], "rng", sep=""));
+write(g1, paste(args[2], "g1", sep=""));
+write(se_g1, paste(args[2], "se_g1", sep=""));
+write(g2, paste(args[2], "g2", sep=""));
+write(se_g2, paste(args[2], "se_g2", sep=""));
+write(md, paste(args[2], "median", sep=""));
+write(iqm, paste(args[2], "iqm", sep=""));
+writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
+writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
+writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/Scale.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Scale.dml b/src/test/scripts/applications/descriptivestats/Scale.dml
index b59c235..e2362cb 100644
--- a/src/test/scripts/applications/descriptivestats/Scale.dml
+++ b/src/test/scripts/applications/descriptivestats/Scale.dml
@@ -1,114 +1,114 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Scale.dml?
-# Assume S_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for V and rows = 5 for P
-# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/prob 5
-# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
-# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
-# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
-# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
-# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
-
-V = read($1, rows=$2, cols=1, format="text")
-P = read($3, rows=$4, cols=1, format="text")
-
-n = nrow(V)
-
-# sum
-s1 = sum(V)
-
-# 2nd central moment
-m2 = moment(V, 2)
-
-# 3rd central moment
-m3 = moment(V, 3)
-
-# 4th central moment
-m4 = moment(V, 4)
-
-# mean
-mu = mean(V)
-
-# variances
-var = n/(n-1.0)*m2
-
-# standard deviations
-std_dev = sqrt(var)
-
-# standard errors of mean
-SE = std_dev/sqrt(n)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = m3/(std_dev^3)
-
-# standard error of skewness (not sure how it is defined without the weight)
-se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
-
-# Kurtosis (using binomial formula)
-g2 = m4/(std_dev^4) - 3
-
-# Standard error of Kurtosis (not sure how it is defined without the weight)
-se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5.0)*(n-3.0)) )
-
-# median
-md = median(V)
-
-# quantile
-Q = quantile(V, P)
-
-# inter-quartile mean
-iqm = interQuartileMean(V)
-
-# outliers use ppred to describe it
-out_minus = ppred(V, mu-5*std_dev, "<")*V 
-out_plus = ppred(V, mu+5*std_dev, ">")*V
-
-write(mu, $5);
-write(std_dev, $6);
-write(SE, $7);
-write(var, $8);
-write(cv, $9);
-write(mn, $10);
-write(mx, $11);
-write(rng, $12);
-write(g1, $13);
-write(se_g1, $14);
-write(g2, $15);
-write(se_g2, $16);
-write(md, $17);
-write(iqm, $18);
-write(out_minus, $19, format="text");
-write(out_plus, $20, format="text");
-write(Q, $21, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Scale.dml?
+# Assume S_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for V and rows = 5 for P
+# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/prob 5
+# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
+# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
+# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
+# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
+# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
+
+V = read($1, rows=$2, cols=1, format="text")
+P = read($3, rows=$4, cols=1, format="text")
+
+n = nrow(V)
+
+# sum
+s1 = sum(V)
+
+# 2nd central moment
+m2 = moment(V, 2)
+
+# 3rd central moment
+m3 = moment(V, 3)
+
+# 4th central moment
+m4 = moment(V, 4)
+
+# mean
+mu = mean(V)
+
+# variances
+var = n/(n-1.0)*m2
+
+# standard deviations
+std_dev = sqrt(var)
+
+# standard errors of mean
+SE = std_dev/sqrt(n)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = m3/(std_dev^3)
+
+# standard error of skewness (not sure how it is defined without the weight)
+se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
+
+# Kurtosis (using binomial formula)
+g2 = m4/(std_dev^4) - 3
+
+# Standard error of Kurtosis (not sure how it is defined without the weight)
+se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5.0)*(n-3.0)) )
+
+# median
+md = median(V)
+
+# quantile
+Q = quantile(V, P)
+
+# inter-quartile mean
+iqm = interQuartileMean(V)
+
+# outliers use ppred to describe it
+out_minus = ppred(V, mu-5*std_dev, "<")*V 
+out_plus = ppred(V, mu+5*std_dev, ">")*V
+
+write(mu, $5);
+write(std_dev, $6);
+write(SE, $7);
+write(var, $8);
+write(cv, $9);
+write(mn, $10);
+write(mx, $11);
+write(rng, $12);
+write(g1, $13);
+write(se_g1, $14);
+write(g2, $15);
+write(se_g2, $16);
+write(md, $17);
+write(iqm, $18);
+write(out_minus, $19, format="text");
+write(out_plus, $20, format="text");
+write(Q, $21, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategorical.R b/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
index 73bf9f3..e1d0880 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
@@ -1,69 +1,69 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
-# command line invocation assuming $SC_HOME is set to the home of the R script
-# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-Y = readMM(paste(args[1], "Y.mtx", sep=""));
-
-Av = A[,1];
-Yv = Y[,1];
-
-W = nrow(A);
-my = mean(Yv); #sum(Yv)/W;
-varY = var(Yv);
-
-CFreqs = as.matrix(table(Av)); 
-CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
-CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-print(anova_num, digits=15);
-print(anova_den, digits=15);
-
-write(Eta, paste(args[2], "Eta", sep=""));
-
-write(ANOVAF, paste(args[2], "AnovaF", sep=""));
-
-write(varY, paste(args[2], "VarY", sep=""));
-
-write(my, paste(args[2], "MeanY", sep=""));
-
-writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
-writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
-writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
+# command line invocation assuming $SC_HOME is set to the home of the R script
+# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+Y = readMM(paste(args[1], "Y.mtx", sep=""));
+
+Av = A[,1];
+Yv = Y[,1];
+
+W = nrow(A);
+my = mean(Yv); #sum(Yv)/W;
+varY = var(Yv);
+
+CFreqs = as.matrix(table(Av)); 
+CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
+CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+print(anova_num, digits=15);
+print(anova_den, digits=15);
+
+write(Eta, paste(args[2], "Eta", sep=""));
+
+write(ANOVAF, paste(args[2], "AnovaF", sep=""));
+
+write(varY, paste(args[2], "VarY", sep=""));
+
+write(my, paste(args[2], "MeanY", sep=""));
+
+writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
+writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
+writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml b/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
index d0d5135..f9d7efb 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
@@ -1,62 +1,62 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleCategorical.dml?
-# Assume SC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
-# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" 
-#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
-#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
-
-A = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-
-# mean and variance in target variable
-W = nrow(A);
-my = mean(Y);
-varY = moment(Y,2) * W/(W-1.0);
-
-# category-wise (frequencies, means, variances)
-CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-CMeans = aggregate(target=Y, groups=A, fn="mean");
-CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-# output required statistics
-write(varY, $4);
-write(my, $5);
-
-write(CFreqs, $6, format="text");
-write(CMeans, $7, format="text");
-write(CVars, $8, format="text");
-
-write(Eta, $9);
-write(ANOVAF, $10);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleCategorical.dml?
+# Assume SC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
+# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" 
+#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
+#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
+
+A = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+
+# mean and variance in target variable
+W = nrow(A);
+my = mean(Y);
+varY = moment(Y,2) * W/(W-1.0);
+
+# category-wise (frequencies, means, variances)
+CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+CMeans = aggregate(target=Y, groups=A, fn="mean");
+CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+# output required statistics
+write(varY, $4);
+write(my, $5);
+
+write(CFreqs, $6, format="text");
+write(CMeans, $7, format="text");
+write(CVars, $8, format="text");
+
+write(Eta, $9);
+write(ANOVAF, $10);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
index 6e67716..bdf1cc2 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
-# command line invocation assuming $SC_HOME is set to the home of the R script
-# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-# Usage: R --vanilla -args Xfile X < ScaleCategoricalTest.R
-
-#parseCommandArgs()
-######################
-Atemp = readMM(paste(args[1], "A.mtx", sep=""));
-Ytemp = readMM(paste(args[1], "Y.mtx", sep=""));
-WM = readMM(paste(args[1], "WM.mtx", sep=""));
-
-Yv=rep(Ytemp[,1],WM[,1])
-Av=rep(Atemp[,1],WM[,1])
-
-W = sum(WM);
-my = sum(Yv)/W;
-varY = var(Yv);
-
-CFreqs = as.matrix(table(Av)); 
-CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
-CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-print(W, digits=15);
-print(R, digits=15);
-print(anova_num, digits=15);
-print(anova_den, digits=15);
-
-#######################
-
-write(Eta, paste(args[2], "Eta", sep=""));
-
-write(ANOVAF, paste(args[2], "AnovaF", sep=""));
-
-write(varY, paste(args[2], "VarY", sep=""));
-
-write(my, paste(args[2], "MeanY", sep=""));
-
-writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
-writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
-writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
+# command line invocation assuming $SC_HOME is set to the home of the R script
+# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+# Usage: R --vanilla -args Xfile X < ScaleCategoricalTest.R
+
+#parseCommandArgs()
+######################
+Atemp = readMM(paste(args[1], "A.mtx", sep=""));
+Ytemp = readMM(paste(args[1], "Y.mtx", sep=""));
+WM = readMM(paste(args[1], "WM.mtx", sep=""));
+
+Yv=rep(Ytemp[,1],WM[,1])
+Av=rep(Atemp[,1],WM[,1])
+
+W = sum(WM);
+my = sum(Yv)/W;
+varY = var(Yv);
+
+CFreqs = as.matrix(table(Av)); 
+CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
+CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+print(W, digits=15);
+print(R, digits=15);
+print(anova_num, digits=15);
+print(anova_den, digits=15);
+
+#######################
+
+write(Eta, paste(args[2], "Eta", sep=""));
+
+write(ANOVAF, paste(args[2], "AnovaF", sep=""));
+
+write(varY, paste(args[2], "VarY", sep=""));
+
+write(my, paste(args[2], "MeanY", sep=""));
+
+writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
+writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
+writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
index b8186fb..7615d54 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
@@ -1,65 +1,65 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleCategorical.dml?
-# Assume SC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
-# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" 
-#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
-#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
-
-#A <- nominal variable
-#Y <- scale variable
-#WM <- weights
-
-A = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-W = sum(WM);
-my = sum(Y*WM)/W;
-varY = moment(Y,WM,2) * W/(W-1.0);
-
-CFreqs = aggregate(target=WM, groups=A, fn="sum");
-CMeans = aggregate(target=Y, groups=A, weights=WM, fn="mean");
-CVars =  aggregate(target=Y, groups=A, weights=WM, fn="variance");
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-# output required statistics
-write(varY, $5);
-write(my, $6);
-
-write(CFreqs, $7, format="text");
-write(CMeans, $8, format="text");
-write(CVars, $9, format="text");
-
-write(Eta, $10);
-write(ANOVAF, $11);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleCategorical.dml?
+# Assume SC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
+# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" 
+#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
+#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
+
+#A <- nominal variable
+#Y <- scale variable
+#WM <- weights
+
+A = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+W = sum(WM);
+my = sum(Y*WM)/W;
+varY = moment(Y,WM,2) * W/(W-1.0);
+
+CFreqs = aggregate(target=WM, groups=A, fn="sum");
+CMeans = aggregate(target=Y, groups=A, weights=WM, fn="mean");
+CVars =  aggregate(target=Y, groups=A, weights=WM, fn="variance");
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+# output required statistics
+write(varY, $5);
+write(my, $6);
+
+write(CFreqs, $7, format="text");
+write(CMeans, $8, format="text");
+write(CVars, $9, format="text");
+
+write(Eta, $10);
+write(ANOVAF, $11);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleScale.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScale.R b/src/test/scripts/applications/descriptivestats/ScaleScale.R
index 202a057..690f3a3 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScale.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleScale.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $SS_HOME is set to the home of the R script
-# Rscript $SS_HOME/ScaleScale.R $SS_HOME/in/ $SS_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-Y = readMM(paste(args[1], "Y.mtx", sep=""))
-
-# cor.test returns a list containing t-statistic, df, p-value, and R
-cort = cor.test(X[,1], Y[,1]);
-
-R = as.numeric(cort[4]);
-
-write(R, paste(args[2], "PearsonR", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $SS_HOME is set to the home of the R script
+# Rscript $SS_HOME/ScaleScale.R $SS_HOME/in/ $SS_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+Y = readMM(paste(args[1], "Y.mtx", sep=""))
+
+# cor.test returns a list containing t-statistic, df, p-value, and R
+cort = cor.test(X[,1], Y[,1]);
+
+R = as.numeric(cort[4]);
+
+write(R, paste(args[2], "PearsonR", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/descriptivestats/ScaleScale.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScale.dml b/src/test/scripts/applications/descriptivestats/ScaleScale.dml
index e3d2183..0fd2179 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScale.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleScale.dml
@@ -1,48 +1,48 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleScale.dml?
-# Assume $SS_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume X and Y are scale variables and both have 100000 rows
-# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$OUPUT_DIR/PearsonR"
-
-X = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-
-W = nrow(X);
-
-# Unweighted co-variance
-covXY = cov(X,Y);
-
-# compute standard deviations for both X and Y by computing 2^nd central moment
-m2X = moment(X,2);
-m2Y = moment(Y,2);
-sigmaX = sqrt(m2X * (W/(W-1.0)) );
-sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-# Pearson's R
-R = covXY / (sigmaX*sigmaY);
-
-write(R, $4);
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleScale.dml?
+# Assume $SS_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume X and Y are scale variables and both have 100000 rows
+# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$OUPUT_DIR/PearsonR"
+
+X = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+
+W = nrow(X);
+
+# Unweighted co-variance
+covXY = cov(X,Y);
+
+# compute standard deviations for both X and Y by computing 2^nd central moment
+m2X = moment(X,2);
+m2Y = moment(Y,2);
+sigmaX = sqrt(m2X * (W/(W-1.0)) );
+sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+# Pearson's R
+R = covXY / (sigmaX*sigmaY);
+
+write(R, $4);
+
+