You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:33:52 UTC

[16/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_naive-bayes.R b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
index f455c2c..cb0d00f 100644
--- a/src/test/scripts/applications/parfor/parfor_naive-bayes.R
+++ b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = 1
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-repClassSums = classSums %*% matrix(1,1,numFeatures);
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = 1
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+repClassSums = classSums %*% matrix(1,1,numFeatures);
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate.R b/src/test/scripts/applications/parfor/parfor_univariate.R
index cb5dfb1..14f9f95 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate.R
+++ b/src/test/scripts/applications/parfor/parfor_univariate.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A1 <- readMM(paste(args[1], "D.mtx", sep=""))
-K1 <- readMM(paste(args[1], "K.mtx", sep=""))
-A <- as.matrix(A1);
-K <- as.matrix(K1);
-maxC = args[2];  
-
-
-# number of features/attributes
-n = ncol(A);
-
-# number of data records
-m = nrow(A);
-
-# number of statistics
-numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-max_kind = max(K);
-  
-# matrices to store computed statistics
-baseStats = array(0,dim=c(numBaseStats,n)); 
-
-if (maxC > 0) {
-  countsArray = array(0,dim=c(maxC,n)); 
-}
-  
-for(i in 1:n) {
-
-	# project out the i^th column
-	F = as.matrix(A[,i]);
-
-	kind = K[1,i];
-
-	if ( kind == 1 ) {
-		print("scale");
-		# compute SCALE statistics on the projected column
-		minimum = min(F);
-		maximum = max(F);
-		rng = maximum - minimum;
-
-		mu = mean(F);
-		m2 = moment(F, order=2, central=TRUE);
-		m3 = moment(F, order=3, central=TRUE);
-		m4 = moment(F, order=4, central=TRUE);
-
-		var = m/(m-1.0)*m2;
-    
-		std_dev = sqrt(var);
-		se = std_dev/sqrt(m);
-		cv = std_dev/mu;
-
-		g1 = m3/(std_dev^3);
-		g2 = m4/(std_dev^4) - 3;
-		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-		md = median(F); #quantile(F, 0.5, type = 1);
-
-		S = sort(F)
-		q25d=m*0.25
-		q75d=m*0.75
-		q25i=ceiling(q25d)
-		q75i=ceiling(q75d)
-
-		iqm = sum(S[(q25i+1):q75i])
-		iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-		iqm = iqm/(m*0.5)
-
-		#iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
-    
-		# place the computed statistics in output matrices
-		baseStats[1,i] = minimum;
-		baseStats[2,i] = maximum;
-		baseStats[3,i] = rng;
-
-		baseStats[4,i] = mu;
-		baseStats[5,i] = var;
-		baseStats[6,i] = std_dev;
-		baseStats[7,i] = se;
-		baseStats[8,i] = cv;
-
-		baseStats[9,i] = g1;
-		baseStats[10,i] = g2;
-		baseStats[11,i] = se_g1;
-		baseStats[12,i] = se_g2;
-
-		baseStats[13,i] = md;
-		baseStats[14,i] = iqm;
-	}
-	else {
-		if (kind == 2 | kind == 3) {
-			print("categorical");
-			
-			# check if the categorical column has valid values
-			minF = min(F);
-			if (minF <=0) {
-				print("ERROR: Categorical attributes can only take values starting from 1.");
-			}
-			else {
-				# compute CATEGORICAL statistics on the projected column
-				cat_counts = table(F);  # counts for each category
-				num_cat = nrow(cat_counts); # number of categories
-
-        mx = max(t(as.vector(cat_counts)))
-        mode = which(cat_counts == mx)    
-        
-      	numModes = length(cat_counts[ cat_counts==mx ]);
-
-				# place the computed statistics in output matrices
-				baseStats[15,i] = num_cat;
-				baseStats[16,i] = mode;
-				baseStats[17,i] = numModes;
-
-        if (max_kind > 1) {
-				  countsArray[1:length(cat_counts),i] = cat_counts;
-				}
-			}
-		}
-	}
-}
-
-writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
-if (max_kind > 1) {
-  writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A1 <- readMM(paste(args[1], "D.mtx", sep=""))
+K1 <- readMM(paste(args[1], "K.mtx", sep=""))
+A <- as.matrix(A1);
+K <- as.matrix(K1);
+maxC = args[2];  
+
+
+# number of features/attributes
+n = ncol(A);
+
+# number of data records
+m = nrow(A);
+
+# number of statistics
+numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+max_kind = max(K);
+  
+# matrices to store computed statistics
+baseStats = array(0,dim=c(numBaseStats,n)); 
+
+if (maxC > 0) {
+  countsArray = array(0,dim=c(maxC,n)); 
+}
+  
+for(i in 1:n) {
+
+	# project out the i^th column
+	F = as.matrix(A[,i]);
+
+	kind = K[1,i];
+
+	if ( kind == 1 ) {
+		print("scale");
+		# compute SCALE statistics on the projected column
+		minimum = min(F);
+		maximum = max(F);
+		rng = maximum - minimum;
+
+		mu = mean(F);
+		m2 = moment(F, order=2, central=TRUE);
+		m3 = moment(F, order=3, central=TRUE);
+		m4 = moment(F, order=4, central=TRUE);
+
+		var = m/(m-1.0)*m2;
+    
+		std_dev = sqrt(var);
+		se = std_dev/sqrt(m);
+		cv = std_dev/mu;
+
+		g1 = m3/(std_dev^3);
+		g2 = m4/(std_dev^4) - 3;
+		#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+		se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+		#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+		se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+		md = median(F); #quantile(F, 0.5, type = 1);
+
+		S = sort(F)
+		q25d=m*0.25
+		q75d=m*0.75
+		q25i=ceiling(q25d)
+		q75i=ceiling(q75d)
+
+		iqm = sum(S[(q25i+1):q75i])
+		iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+		iqm = iqm/(m*0.5)
+
+		#iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
+    
+		# place the computed statistics in output matrices
+		baseStats[1,i] = minimum;
+		baseStats[2,i] = maximum;
+		baseStats[3,i] = rng;
+
+		baseStats[4,i] = mu;
+		baseStats[5,i] = var;
+		baseStats[6,i] = std_dev;
+		baseStats[7,i] = se;
+		baseStats[8,i] = cv;
+
+		baseStats[9,i] = g1;
+		baseStats[10,i] = g2;
+		baseStats[11,i] = se_g1;
+		baseStats[12,i] = se_g2;
+
+		baseStats[13,i] = md;
+		baseStats[14,i] = iqm;
+	}
+	else {
+		if (kind == 2 | kind == 3) {
+			print("categorical");
+			
+			# check if the categorical column has valid values
+			minF = min(F);
+			if (minF <=0) {
+				print("ERROR: Categorical attributes can only take values starting from 1.");
+			}
+			else {
+				# compute CATEGORICAL statistics on the projected column
+				cat_counts = table(F);  # counts for each category
+				num_cat = nrow(cat_counts); # number of categories
+
+        mx = max(t(as.vector(cat_counts)))
+        mode = which(cat_counts == mx)    
+        
+      	numModes = length(cat_counts[ cat_counts==mx ]);
+
+				# place the computed statistics in output matrices
+				baseStats[15,i] = num_cat;
+				baseStats[16,i] = mode;
+				baseStats[17,i] = numModes;
+
+        if (max_kind > 1) {
+				  countsArray[1:length(cat_counts),i] = cat_counts;
+				}
+			}
+		}
+	}
+}
+
+writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
+if (max_kind > 1) {
+  writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate0.dml b/src/test/scripts/applications/parfor/parfor_univariate0.dml
index 061d4a0..2a6a9c5 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate0.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate0.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-    	
-	for(i in 1:n) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F); #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+    	
+	for(i in 1:n) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F); #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate1.dml b/src/test/scripts/applications/parfor/parfor_univariate1.dml
index e22fd86..1f120ef 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate1.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate1.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-	
-	parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F); #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+	
+	parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F); #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate4.dml b/src/test/scripts/applications/parfor/parfor_univariate4.dml
index 1ebfcbd..8953c64 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate4.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate4.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) A - input data
-#     $2) K - row matrix that denotes the "kind" for each 
-#              attribute
-#             kind=1 for scale, kind=2 for nominal,
-#             kind=3 for ordinal
-#     $3) maxC - maximum number of categories in any categorical 
-#         attribute
-#
-# One output:
-#     $4) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3;  # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
-	print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-	
-	
-	# number of features/attributes
-	n = ncol(A);
-
-	# number of data records
-	m = nrow(A);
-
-	# number of statistics
-	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-    max_kind = max(K);
-    
-	# matrices to store computed statistics
-	baseStats = matrix(0, rows=numBaseStats, cols=n);
-	
-	if (maxC > 0) {
-	  countsArray = matrix(0, rows=maxC, cols=n);
-    }
-    
-	parfor(i in 1:n, check=0) {
-
-		# project out the i^th column
-		F = A[,i];
-
-		kind = castAsScalar(K[1,i]);
-
-		if ( kind == 1 ) {
-			print("[" + i + "] Scale");
-			# compute SCALE statistics on the projected column
-			minimum = min(F);
-			maximum = max(F);
-			rng = maximum - minimum;
-
-			mu = mean(F);
-			m2 = moment(F, 2);
-			m3 = moment(F, 3);
-			m4 = moment(F, 4);
-
-			var = m/(m-1.0)*m2;
-			std_dev = sqrt(var);
-			se = std_dev/sqrt(m);
-			cv = std_dev/mu;
-
-			g1 = m3/(std_dev^3);
-			g2 = m4/(std_dev^4) - 3;
-			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-			md = median(F) #quantile(F, 0.5);
-			iqm = interQuartileMean(F);
-
-			# place the computed statistics in output matrices
-			baseStats[1,i] = minimum;
-			baseStats[2,i] = maximum;
-			baseStats[3,i] = rng;
-
-			baseStats[4,i] = mu;
-			baseStats[5,i] = var;
-			baseStats[6,i] = std_dev;
-			baseStats[7,i] = se;
-			baseStats[8,i] = cv;
-
-			baseStats[9,i] = g1;
-			baseStats[10,i] = g2;
-			baseStats[11,i] = se_g1;
-			baseStats[12,i] = se_g2;
-
-			baseStats[13,i] = md;
-			baseStats[14,i] = iqm;
-		}
-		else {
-			if (kind == 2 | kind == 3) {
-				print("[" + i + "] Categorical");
-				
-				# check if the categorical column has valid values
-				minF = min(F);
-				if (minF <=0) {
-					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
-				}
-				else {
-					# compute CATEGORICAL statistics on the projected column
-					cat_counts = table(F,1);  # counts for each category
-					num_cat = nrow(cat_counts); # number of categories
-
-					mode = rowIndexMax(t(cat_counts));
-					mx = max(cat_counts)
-					modeArr =  ppred(cat_counts, mx, "==")
-					numModes = sum(modeArr);
-
-					# place the computed statistics in output matrices
-					baseStats[15,i] = num_cat;
-					baseStats[16,i] = mode;
-					baseStats[17,i] = numModes;
-
-          if (max_kind > 1) {
-					  countsArray[,i] = cat_counts;
-					}
-				}
-			}
-		}
-	}
-
-	write(baseStats, $4+"/base.stats");
-	if (max_kind > 1) {
-		write(countsArray, $4+"/categorical.counts");
-	}
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) A - input data
+#     $2) K - row matrix that denotes the "kind" for each 
+#              attribute
+#             kind=1 for scale, kind=2 for nominal,
+#             kind=3 for ordinal
+#     $3) maxC - maximum number of categories in any categorical 
+#         attribute
+#
+# One output:
+#     $4) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3;  # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+	print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+	
+	
+	# number of features/attributes
+	n = ncol(A);
+
+	# number of data records
+	m = nrow(A);
+
+	# number of statistics
+	numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+    max_kind = max(K);
+    
+	# matrices to store computed statistics
+	baseStats = matrix(0, rows=numBaseStats, cols=n);
+	
+	if (maxC > 0) {
+	  countsArray = matrix(0, rows=maxC, cols=n);
+    }
+    
+	parfor(i in 1:n, check=0) {
+
+		# project out the i^th column
+		F = A[,i];
+
+		kind = castAsScalar(K[1,i]);
+
+		if ( kind == 1 ) {
+			print("[" + i + "] Scale");
+			# compute SCALE statistics on the projected column
+			minimum = min(F);
+			maximum = max(F);
+			rng = maximum - minimum;
+
+			mu = mean(F);
+			m2 = moment(F, 2);
+			m3 = moment(F, 3);
+			m4 = moment(F, 4);
+
+			var = m/(m-1.0)*m2;
+			std_dev = sqrt(var);
+			se = std_dev/sqrt(m);
+			cv = std_dev/mu;
+
+			g1 = m3/(std_dev^3);
+			g2 = m4/(std_dev^4) - 3;
+			#se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+			se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+			#se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+			se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+			md = median(F) #quantile(F, 0.5);
+			iqm = interQuartileMean(F);
+
+			# place the computed statistics in output matrices
+			baseStats[1,i] = minimum;
+			baseStats[2,i] = maximum;
+			baseStats[3,i] = rng;
+
+			baseStats[4,i] = mu;
+			baseStats[5,i] = var;
+			baseStats[6,i] = std_dev;
+			baseStats[7,i] = se;
+			baseStats[8,i] = cv;
+
+			baseStats[9,i] = g1;
+			baseStats[10,i] = g2;
+			baseStats[11,i] = se_g1;
+			baseStats[12,i] = se_g2;
+
+			baseStats[13,i] = md;
+			baseStats[14,i] = iqm;
+		}
+		else {
+			if (kind == 2 | kind == 3) {
+				print("[" + i + "] Categorical");
+				
+				# check if the categorical column has valid values
+				minF = min(F);
+				if (minF <=0) {
+					print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+				}
+				else {
+					# compute CATEGORICAL statistics on the projected column
+					cat_counts = table(F,1);  # counts for each category
+					num_cat = nrow(cat_counts); # number of categories
+
+					mode = rowIndexMax(t(cat_counts));
+					mx = max(cat_counts)
+					modeArr =  ppred(cat_counts, mx, "==")
+					numModes = sum(modeArr);
+
+					# place the computed statistics in output matrices
+					baseStats[15,i] = num_cat;
+					baseStats[16,i] = mode;
+					baseStats[17,i] = numModes;
+
+          if (max_kind > 1) {
+					  countsArray[,i] = cat_counts;
+					}
+				}
+			}
+		}
+	}
+
+	write(baseStats, $4+"/base.stats");
+	if (max_kind > 1) {
+		write(countsArray, $4+"/categorical.counts");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/validation/LinearLogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/LinearLogisticRegression.dml b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
index b5d3955..473b2dc 100644
--- a/src/test/scripts/applications/validation/LinearLogisticRegression.dml
+++ b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Linear Logistic Regression using Trust Region methods. 
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
-# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
-# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
-
-# 100K dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
-
-# 1M dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
-
-
-# internal parameters
-tol = 0.001
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1 
-
-# read training data files
-X = read($1)
-intercept = $3
-
-D = ncol(X)
-#initialize w
-w = Rand(rows=D, cols=1, min=0.0, max=0.0);
-zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
-
-if (intercept == 1) {
- num_samples = nrow(X);
- ones  = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
- X = append(X, ones);
- zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
- w = t(append(t(w), zero_matrix));
- zeros_D = t(append(t(zeros_D), zero_matrix));
-}
-
-N = nrow(X)
-
-# read (training and test) labels
-y = read($2)
-
-maxiter = $4
-maxinneriter = 1000
-
-C = $5
-
-e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
-o = X %*% w
-logistic = 1.0/(1.0 + exp( -y * o))
-
-obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
-grad = w + C*t(X) %*% ((logistic - 1)*y)
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-
-# VS: change
-zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
-
-# boolean for convergence check
-
-converge = (delta < tol) | (iter > maxiter)
-norm_r2 = sum(grad*grad)
-
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-alpha = t(w) %*% w
-alpha2 = alpha
-
-while(!converge) {
- 
- norm_grad = sqrt(sum(grad*grad))
- 
- print("-- Outer Iteration = " + iter)
- objScalar = castAsScalar(obj)
- print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
- 
- # SOLVE TRUST REGION SUB-PROBLEM
- s = zeros_D
- os = zeros_N
- r = -grad
- d = r
- inneriter = 0
- innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
- while (!innerconverge) {
-     inneriter = inneriter + 1
-  norm_r2 = sum(r*r)
-  od = X %*% d
-  Hd = d + C*(t(X) %*% (logisticD*od))
-  alpha_deno = t(d) %*% Hd 
-  alpha = norm_r2 / alpha_deno
- 
-  s = s + castAsScalar(alpha) * d
-  os = os + castAsScalar(alpha) * od
-
-  sts = t(s) %*% s
-  delta2 = delta*delta 
-  stsScalar = castAsScalar(sts)
-  
-  shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
-  if (stsScalar > delta2) {
-      print("      --- cg reaches trust region boundary")
-   s = s - castAsScalar(alpha) * d
-   os = os - castAsScalar(alpha) * od
-   std = t(s) %*% d
-   dtd = t(d) %*% d
-   sts = t(s) %*% s
-   rad = sqrt(std*std + dtd*(delta2 - sts))
-   stdScalar = castAsScalar(std)
-   if(stdScalar >= 0) {
-    tau = (delta2 - sts)/(std + rad)
-   } 
-   else {
-    tau = (rad - std)/dtd
-   }
-      
-   s = s + castAsScalar(tau) * d
-   os = os + castAsScalar(tau) * od
-   r = r - castAsScalar(tau) * Hd
-   
-   #break
-   shouldBreak = TRUE;
-   innerconverge = TRUE;
-  
-  } 
-  
-  if (!shouldBreak) {
-   r = r - castAsScalar(alpha) * Hd
-   old_norm_r2 = norm_r2 
-   norm_r2 = sum(r*r)
-   beta = norm_r2/old_norm_r2
-   d = r + beta*d
-   innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
-  }
- }  
- 
- print("      --- Inner CG Iteration =  " + inneriter)
- # END TRUST REGION SUB-PROBLEM
- # compute rho, update w, obtain delta
- gs = t(s) %*% grad
- qk = -0.5*(gs - (t(s) %*% r))
- 
- wnew = w + s 
- onew = o + os
- logisticnew = 1.0/(1.0 + exp(-y * onew ))
- objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
- 
- actred = (obj - objnew)
- actredScalar = castAsScalar(actred)
- rho = actred / qk
- qkScalar = castAsScalar(qk)
- rhoScalar = castAsScalar(rho);
- snorm = sqrt(sum( s * s ))
-
- print("     Actual    = " + actredScalar)
- print("     Predicted = " + qkScalar)
- 
- if (iter==0) {
-    delta = min(delta, snorm)
- }
- alpha2 = objnew - obj - gs
- alpha2Scalar = castAsScalar(alpha2)
- if (alpha2Scalar <= 0) {
-    alpha = sigma3*e
- } 
- else {
-    ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
-    alpha = ascalar*e
- }
-
- if (rhoScalar > eta0) {
-  
-  w = wnew
-  o = onew
-  grad = w + C*t(X) %*% ((logisticnew - 1) * y )
-  norm_grad = sqrt(sum(grad*grad))
-  logisticD = logisticnew * (1 - logisticnew)
-  obj = objnew 
- } 
-
- alphaScalar = castAsScalar(alpha)
- if (rhoScalar < eta0){
-  delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
- }
- else {
-  if (rhoScalar < eta1){
-   delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
-  }
-  else { 
-   if (rhoScalar < eta2) {
-    delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
-   }
-   else {
-    delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
-   }
-  }
- } 
- 
- o2 = y * o
- correct = sum(ppred(o2, 0, ">"))
- accuracy = correct*100.0/N 
- iter = iter + 1
- #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
- converge = (norm_grad < tol) | (iter > maxiter)
-
- print("     Delta =  " + delta)
- print("     Training Accuracy =  " +  accuracy)
- print("     Correct =  " + correct)
- print("     OuterIter =  " + iter)
- print("     Converge =  " + converge)
-} 
-
-write(w, $6, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Linear Logistic Regression using Trust Region methods. 
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
+# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
+# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
+
+# 100K dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
+
+# 1M dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
+
+
+# internal parameters
+tol = 0.001
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1 
+
+# read training data files
+X = read($1)
+intercept = $3
+
+D = ncol(X)
+#initialize w
+w = Rand(rows=D, cols=1, min=0.0, max=0.0);
+zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
+
+if (intercept == 1) {
+ num_samples = nrow(X);
+ ones  = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
+ X = append(X, ones);
+ zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
+ w = t(append(t(w), zero_matrix));
+ zeros_D = t(append(t(zeros_D), zero_matrix));
+}
+
+N = nrow(X)
+
+# read (training and test) labels
+y = read($2)
+
+maxiter = $4
+maxinneriter = 1000
+
+C = $5
+
+e = Rand(rows=1, cols=1, min=1.0, max=1.0); 
+o = X %*% w
+logistic = 1.0/(1.0 + exp( -y * o))
+
+obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
+grad = w + C*t(X) %*% ((logistic - 1)*y)
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+
+# VS: change
+zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
+
+# boolean for convergence check
+
+converge = (delta < tol) | (iter > maxiter)
+norm_r2 = sum(grad*grad)
+
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+alpha = t(w) %*% w
+alpha2 = alpha
+
+while(!converge) {
+ 
+ norm_grad = sqrt(sum(grad*grad))
+ 
+ print("-- Outer Iteration = " + iter)
+ objScalar = castAsScalar(obj)
+ print("     Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
+ 
+ # SOLVE TRUST REGION SUB-PROBLEM
+ s = zeros_D
+ os = zeros_N
+ r = -grad
+ d = r
+ inneriter = 0
+ innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) 
+ while (!innerconverge) {
+     inneriter = inneriter + 1
+  norm_r2 = sum(r*r)
+  od = X %*% d
+  Hd = d + C*(t(X) %*% (logisticD*od))
+  alpha_deno = t(d) %*% Hd 
+  alpha = norm_r2 / alpha_deno
+ 
+  s = s + castAsScalar(alpha) * d
+  os = os + castAsScalar(alpha) * od
+
+  sts = t(s) %*% s
+  delta2 = delta*delta 
+  stsScalar = castAsScalar(sts)
+  
+  shouldBreak = FALSE;  # to mimic "break" in the following 'if' condition
+  if (stsScalar > delta2) {
+      print("      --- cg reaches trust region boundary")
+   s = s - castAsScalar(alpha) * d
+   os = os - castAsScalar(alpha) * od
+   std = t(s) %*% d
+   dtd = t(d) %*% d
+   sts = t(s) %*% s
+   rad = sqrt(std*std + dtd*(delta2 - sts))
+   stdScalar = castAsScalar(std)
+   if(stdScalar >= 0) {
+    tau = (delta2 - sts)/(std + rad)
+   } 
+   else {
+    tau = (rad - std)/dtd
+   }
+      
+   s = s + castAsScalar(tau) * d
+   os = os + castAsScalar(tau) * od
+   r = r - castAsScalar(tau) * Hd
+   
+   #break
+   shouldBreak = TRUE;
+   innerconverge = TRUE;
+  
+  } 
+  
+  if (!shouldBreak) {
+   r = r - castAsScalar(alpha) * Hd
+   old_norm_r2 = norm_r2 
+   norm_r2 = sum(r*r)
+   beta = norm_r2/old_norm_r2
+   d = r + beta*d
+   innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
+  }
+ }  
+ 
+ print("      --- Inner CG Iteration =  " + inneriter)
+ # END TRUST REGION SUB-PROBLEM
+ # compute rho, update w, obtain delta
+ gs = t(s) %*% grad
+ qk = -0.5*(gs - (t(s) %*% r))
+ 
+ wnew = w + s 
+ onew = o + os
+ logisticnew = 1.0/(1.0 + exp(-y * onew ))
+ objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
+ 
+ actred = (obj - objnew)
+ actredScalar = castAsScalar(actred)
+ rho = actred / qk
+ qkScalar = castAsScalar(qk)
+ rhoScalar = castAsScalar(rho);
+ snorm = sqrt(sum( s * s ))
+
+ print("     Actual    = " + actredScalar)
+ print("     Predicted = " + qkScalar)
+ 
+ if (iter==0) {
+    delta = min(delta, snorm)
+ }
+ alpha2 = objnew - obj - gs
+ alpha2Scalar = castAsScalar(alpha2)
+ if (alpha2Scalar <= 0) {
+    alpha = sigma3*e
+ } 
+ else {
+    ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)  
+    alpha = ascalar*e
+ }
+
+ if (rhoScalar > eta0) {
+  
+  w = wnew
+  o = onew
+  grad = w + C*t(X) %*% ((logisticnew - 1) * y )
+  norm_grad = sqrt(sum(grad*grad))
+  logisticD = logisticnew * (1 - logisticnew)
+  obj = objnew 
+ } 
+
+ alphaScalar = castAsScalar(alpha)
+ if (rhoScalar < eta0){
+  delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
+ }
+ else {
+  if (rhoScalar < eta1){
+   delta = max(sigma1 * delta, min( alphaScalar  * snorm, sigma2 * delta))
+  }
+  else { 
+   if (rhoScalar < eta2) {
+    delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
+   }
+   else {
+    delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
+   }
+  }
+ } 
+ 
+ o2 = y * o
+ correct = sum(ppred(o2, 0, ">"))
+ accuracy = correct*100.0/N 
+ iter = iter + 1
+ #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
+ converge = (norm_grad < tol) | (iter > maxiter)
+
+ print("     Delta =  " + delta)
+ print("     Training Accuracy =  " +  accuracy)
+ print("     Correct =  " + correct)
+ print("     OuterIter =  " + iter)
+ print("     Converge =  " + converge)
+} 
+
+write(w, $6, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
index d06de67..b42a315 100644
--- a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
+++ b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
@@ -1,122 +1,122 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data to test linear logistic regression
-
-# 100K dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
-
-# 1M dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is the mean of the linear form (w^T X)
-# $4 is the st.dev. of the linear form (w^T X)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
-# $9 is 0 if no intercept and 1 if there is intercept
-# $10 controls sparsity in the generated data
-
-numSamples = $1
-numFeatures = $2
-meanLF = $3
-sigmaLF = $4
-addNoise = $8
-b = $9
-
-X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
-w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-
-if (b != 0) {
-	b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
-    X = append (X, b_mat);
-    numFeatures_plus_one = numFeatures + 1;
-    w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
-}
-
-[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
-if (sigmaLF != new_sigmaLF) {
-    print ("The standard deviation requirement on the linear form is TOO TIGHT!");
-    print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + "."); 
-}
-ot = X %*% w;
-
-if (b != 0) {
-    X = X [, 1:numFeatures];
-}
-
-emp_meanLF = sum (ot) / numSamples;
-emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
-print ("Empirical meanLF = " + emp_meanLF + ";   Empirical sigmaLF = " + emp_sigmaLF);
-
-prob = 1 / (1 + exp (- ot));
-
-if(addNoise == 1){
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
-	print("this data generator generates the same dataset for both noise=0 and noise=1")
-	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
-}
-
-print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + ";  nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
-
-Y = 1 - 2*ppred(prob, r, "<")
-
-write (w, $5, format="text");
-write (X, $6, format="binary");
-write (Y, $7, format="binary");
-
-
-# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
-# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
-# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
-scaleWeights = 
-    function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
-    return (Matrix[double] w_scaled, double new_sigmaLF)
-{
-    numFeatures = nrow (w_unscaled);
-    W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
-    W_ext [, 1] = w_unscaled;
-    S1 = colSums (X_data %*% W_ext);
-    TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
-    TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
-    TF [1, 2] = S1 [1, 2];
-    TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
-    TF [2, 2] = - S1 [1, 1];
-    TF = W_ext %*% TF;
-    Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
-    Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
-    new_sigmaLF = sigmaLF;
-    discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
-    if (discr > 0.0) {
-        new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
-        discr = -0.0;
-    }
-    t = Rand (rows = 2, cols = 1, min = 1, max = 1);
-    t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
-    w_scaled = TF %*% t;
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data to test linear logistic regression
+
+# 100K dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
+
+# 1M dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is the mean of the linear form (w^T X)
+# $4 is the st.dev. of the linear form (w^T X)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
+# $9 is 0 if no intercept and 1 if there is intercept
+# $10 controls sparsity in the generated data
+
+numSamples = $1
+numFeatures = $2
+meanLF = $3
+sigmaLF = $4
+addNoise = $8
+b = $9
+
+X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
+w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+
+if (b != 0) {
+	b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
+    X = append (X, b_mat);
+    numFeatures_plus_one = numFeatures + 1;
+    w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
+}
+
+[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
+if (sigmaLF != new_sigmaLF) {
+    print ("The standard deviation requirement on the linear form is TOO TIGHT!");
+    print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + "."); 
+}
+ot = X %*% w;
+
+if (b != 0) {
+    X = X [, 1:numFeatures];
+}
+
+emp_meanLF = sum (ot) / numSamples;
+emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
+print ("Empirical meanLF = " + emp_meanLF + ";   Empirical sigmaLF = " + emp_sigmaLF);
+
+prob = 1 / (1 + exp (- ot));
+
+if(addNoise == 1){
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+}else{
+	print("this data generator generates the same dataset for both noise=0 and noise=1")
+	r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+	#r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
+}
+
+print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + ";  nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
+
+Y = 1 - 2*ppred(prob, r, "<")
+
+write (w, $5, format="text");
+write (X, $6, format="binary");
+write (Y, $7, format="binary");
+
+
+# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
+# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
+# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
+scaleWeights = 
+    function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
+    return (Matrix[double] w_scaled, double new_sigmaLF)
+{
+    numFeatures = nrow (w_unscaled);
+    W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
+    W_ext [, 1] = w_unscaled;
+    S1 = colSums (X_data %*% W_ext);
+    TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
+    TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+    TF [1, 2] = S1 [1, 2];
+    TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+    TF [2, 2] = - S1 [1, 1];
+    TF = W_ext %*% TF;
+    Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
+    Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
+    new_sigmaLF = sigmaLF;
+    discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
+    if (discr > 0.0) {
+        new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
+        discr = -0.0;
+    }
+    t = Rand (rows = 2, cols = 1, min = 1, max = 1);
+    t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
+    w_scaled = TF %*% t;
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.R b/src/test/scripts/applications/welchTTest/welchTTest.R
index 66da912..ff74d71 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.R
+++ b/src/test/scripts/applications/welchTTest/welchTTest.R
@@ -1,49 +1,49 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
-negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
-t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
-degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
-
-writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
-writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
+negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
+t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
+degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
+
+writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
+writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.dml b/src/test/scripts/applications/welchTTest/welchTTest.dml
index 4e42f03..7bc0144 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.dml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.dml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = read($1, format="text")
-negSamples = read($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-write(t_statistics, $3, format="text")
-write(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = read($1, format="text")
+negSamples = read($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+write(t_statistics, $3, format="text")
+write(degrees_of_freedom, $4, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.pydml b/src/test/scripts/applications/welchTTest/welchTTest.pydml
index abfff3a..5dbd049 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.pydml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.pydml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = load($1, format="text")
-negSamples = load($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
-
-save(t_statistics, $3, format="text")
-save(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = load($1, format="text")
+negSamples = load($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
+
+save(t_statistics, $3, format="text")
+save(degrees_of_freedom, $4, format="text")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/functions/aggregate/AllMax.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMax.R b/src/test/scripts/functions/aggregate/AllMax.R
index 3ed23d6..73cde8d 100644
--- a/src/test/scripts/functions/aggregate/AllMax.R
+++ b/src/test/scripts/functions/aggregate/AllMax.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(max(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(max(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/functions/aggregate/AllMean.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMean.R b/src/test/scripts/functions/aggregate/AllMean.R
index 4e315a4..07ee1a9 100644
--- a/src/test/scripts/functions/aggregate/AllMean.R
+++ b/src/test/scripts/functions/aggregate/AllMean.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(mean(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(mean(A));
+
 writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep="")); 
\ No newline at end of file