You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:33:56 UTC

[20/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/m-svm/m-svm.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.dml b/src/test/scripts/applications/m-svm/m-svm.dml
index 1307707..bbf5acc 100644
--- a/src/test/scripts/applications/m-svm/m-svm.dml
+++ b/src/test/scripts/applications/m-svm/m-svm.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = read($X)
-
-check_X = sum(X)
-if(check_X == 0){
-	print("X has no non-zeros")
-}else{
-	Y = read($Y)
-	intercept = cmdLine_icpt
-	num_classes = $classes
-	epsilon = cmdLine_tol
-	lambda = cmdLine_reg
-	max_iterations = cmdLine_maxiter
- 
-	num_samples = nrow(X)
-	num_features = ncol(X)
-
-	if (intercept == 1) {
- 		ones  = matrix(1, rows=num_samples, cols=1);
- 		X = append(X, ones);
-	}
-
-	num_rows_in_w = num_features
-	if(intercept == 1){
-		num_rows_in_w = num_rows_in_w + 1
-	}
-	w = matrix(0, rows=num_rows_in_w, cols=num_classes)
-
-	debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
-	parfor(iter_class in 1:num_classes){		  
-		Y_local = 2 * ppred(Y, iter_class, "==") - 1
-		w_class = matrix(0, rows=num_features, cols=1)
-		if (intercept == 1) {
-			zero_matrix = matrix(0, rows=1, cols=1);
- 			w_class = t(append(t(w_class), zero_matrix));
- 		}
- 
-		g_old = t(X) %*% Y_local
- 		s = g_old
-
-		Xw = matrix(0, rows=nrow(X), cols=1)
-		iter = 0
- 		continue = 1
- 		while(continue == 1)  {
-  			# minimizing primal obj along direction s
-  			step_sz = 0
-  			Xd = X %*% s
-  			wd = lambda * sum(w_class * s)
-  			dd = lambda * sum(s * s)
-  			continue1 = 1
-  			while(continue1 == 1){
-   				tmp_Xw = Xw + step_sz*Xd
-   				out = 1 - Y_local * (tmp_Xw)
-   				sv = ppred(out, 0, ">")
-   				out = out * sv
-   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
-   				h = dd + sum(Xd * sv * Xd)
-   				step_sz = step_sz - g/h
-   				if (g*g/h < 0.0000000001){
-    				continue1 = 0
-   				}
-  			}
- 
-  			#update weights
-  			w_class = w_class + step_sz*s
- 			Xw = Xw + step_sz*Xd
- 
-  			out = 1 - Y_local * Xw
-  			sv = ppred(out, 0, ">")
-  			out = sv * out
-  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-  			tmp = sum(s * g_old)
-  
-  			train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-  			print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-  			debug_mat[iter+1,iter_class] = obj	   
-   
-  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-   				continue = 0
-  			}
- 
-  			#non-linear CG step
-  			be = sum(g_new * g_new)/sum(g_old * g_old)
-  			s = be * s + g_new
-  			g_old = g_new
-
-  			iter = iter + 1
- 		}
-
-		w[,iter_class] = w_class
-	}
-
-	write(w, $model, format=cmdLine_fmt)
-
-	debug_str = "# Class, Iter, Obj"
-	for(iter_class in 1:ncol(debug_mat)){
-		for(iter in 1:nrow(debug_mat)){
-			obj = castAsScalar(debug_mat[iter, iter_class])
-			if(obj != -1) 
-				debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-		}
-	}
-	write(debug_str, $Log)
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = read($X)
+
+check_X = sum(X)
+if(check_X == 0){
+	print("X has no non-zeros")
+}else{
+	Y = read($Y)
+	intercept = cmdLine_icpt
+	num_classes = $classes
+	epsilon = cmdLine_tol
+	lambda = cmdLine_reg
+	max_iterations = cmdLine_maxiter
+ 
+	num_samples = nrow(X)
+	num_features = ncol(X)
+
+	if (intercept == 1) {
+ 		ones  = matrix(1, rows=num_samples, cols=1);
+ 		X = append(X, ones);
+	}
+
+	num_rows_in_w = num_features
+	if(intercept == 1){
+		num_rows_in_w = num_rows_in_w + 1
+	}
+	w = matrix(0, rows=num_rows_in_w, cols=num_classes)
+
+	debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+	parfor(iter_class in 1:num_classes){		  
+		Y_local = 2 * ppred(Y, iter_class, "==") - 1
+		w_class = matrix(0, rows=num_features, cols=1)
+		if (intercept == 1) {
+			zero_matrix = matrix(0, rows=1, cols=1);
+ 			w_class = t(append(t(w_class), zero_matrix));
+ 		}
+ 
+		g_old = t(X) %*% Y_local
+ 		s = g_old
+
+		Xw = matrix(0, rows=nrow(X), cols=1)
+		iter = 0
+ 		continue = 1
+ 		while(continue == 1)  {
+  			# minimizing primal obj along direction s
+  			step_sz = 0
+  			Xd = X %*% s
+  			wd = lambda * sum(w_class * s)
+  			dd = lambda * sum(s * s)
+  			continue1 = 1
+  			while(continue1 == 1){
+   				tmp_Xw = Xw + step_sz*Xd
+   				out = 1 - Y_local * (tmp_Xw)
+   				sv = ppred(out, 0, ">")
+   				out = out * sv
+   				g = wd + step_sz*dd - sum(out * Y_local * Xd)
+   				h = dd + sum(Xd * sv * Xd)
+   				step_sz = step_sz - g/h
+   				if (g*g/h < 0.0000000001){
+    				continue1 = 0
+   				}
+  			}
+ 
+  			#update weights
+  			w_class = w_class + step_sz*s
+ 			Xw = Xw + step_sz*Xd
+ 
+  			out = 1 - Y_local * Xw
+  			sv = ppred(out, 0, ">")
+  			out = sv * out
+  			obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+  			g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+  			tmp = sum(s * g_old)
+  
+  			train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+  			print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+  			debug_mat[iter+1,iter_class] = obj	   
+   
+  			if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+   				continue = 0
+  			}
+ 
+  			#non-linear CG step
+  			be = sum(g_new * g_new)/sum(g_old * g_old)
+  			s = be * s + g_new
+  			g_old = g_new
+
+  			iter = iter + 1
+ 		}
+
+		w[,iter_class] = w_class
+	}
+
+	write(w, $model, format=cmdLine_fmt)
+
+	debug_str = "# Class, Iter, Obj"
+	for(iter_class in 1:ncol(debug_mat)){
+		for(iter in 1:nrow(debug_mat)){
+			obj = castAsScalar(debug_mat[iter, iter_class])
+			if(obj != -1) 
+				debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+		}
+	}
+	write(debug_str, $Log)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/m-svm/m-svm.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.pydml b/src/test/scripts/applications/m-svm/m-svm.pydml
index 83f17cf..348f599 100644
--- a/src/test/scripts/applications/m-svm/m-svm.pydml
+++ b/src/test/scripts/applications/m-svm/m-svm.pydml
@@ -1,136 +1,136 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the pydml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = load($X)
-
-check_X = sum(X)
-if(check_X == 0):
-    print("X has no non-zeros")
-else:
-    Y = load($Y)
-    intercept = cmdLine_icpt
-    num_classes = $classes
-    epsilon = cmdLine_tol
-    lambda = cmdLine_reg
-    max_iterations = cmdLine_maxiter
-    
-    num_samples = nrow(X)
-    num_features = ncol(X)
-    
-    if (intercept == 1):
-        ones  = full(1, rows=num_samples, cols=1)
-        X = append(X, ones)
-    
-    num_rows_in_w = num_features
-    if(intercept == 1):
-        num_rows_in_w = num_rows_in_w + 1
-    w = full(0, rows=num_rows_in_w, cols=num_classes)
-    
-    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
-    parfor(iter_class in 1:num_classes):
-        Y_local = 2 * ppred(Y, iter_class, "==") - 1
-        w_class = full(0, rows=num_features, cols=1)
-        if (intercept == 1):
-            zero_matrix = full(0, rows=1, cols=1)
-            w_class = transpose(append(transpose(w_class), zero_matrix))
-        g_old = dot(transpose(X), Y_local)
-        s = g_old
-        
-        Xw = full(0, rows=nrow(X), cols=1)
-        iter = 0
-        continue = 1
-        while(continue == 1):
-            # minimizing primal obj along direction s
-            step_sz = 0
-            Xd = dot(X, s)
-            wd = lambda * sum(w_class * s)
-            dd = lambda * sum(s * s)
-            continue1 = 1
-            while(continue1 == 1):
-                tmp_Xw = Xw + step_sz*Xd
-                out = 1 - Y_local * (tmp_Xw)
-                sv = ppred(out, 0, ">")
-                out = out * sv
-                g = wd + step_sz*dd - sum(out * Y_local * Xd)
-                h = dd + sum(Xd * sv * Xd)
-                step_sz = step_sz - g/h
-                if (g*g/h < 0.0000000001):
-                    continue1 = 0
-            
-            #update weights
-            w_class = w_class + step_sz*s
-            Xw = Xw + step_sz*Xd
-            
-            out = 1 - Y_local * Xw
-            sv = ppred(out, 0, ">")
-            out = sv * out
-            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
-            
-            tmp = sum(s * g_old)
-            
-            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, ">="))/num_samples*100
-            print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-            debug_mat[iter+1,iter_class] = obj
-            
-            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
-                continue = 0
-            
-            #non-linear CG step
-            be = sum(g_new * g_new)/sum(g_old * g_old)
-            s = be * s + g_new
-            g_old = g_new
-            
-            iter = iter + 1
-        # end while(continue == 1)
-        
-        w[,iter_class] = w_class
-    # end parfor(iter_class in 1:num_classes)
-    
-    save(w, $model, format=cmdLine_fmt)
-    
-    debug_str = "# Class, Iter, Obj"
-    for(iter_class in 1:ncol(debug_mat)):
-        for(iter in 1:nrow(debug_mat)):
-            obj = castAsScalar(debug_mat[iter, iter_class])
-            if(obj != -1):
-                debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-    save(debug_str, $Log)
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the pydml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = load($X)
+
+check_X = sum(X)
+if(check_X == 0):
+    print("X has no non-zeros")
+else:
+    Y = load($Y)
+    intercept = cmdLine_icpt
+    num_classes = $classes
+    epsilon = cmdLine_tol
+    lambda = cmdLine_reg
+    max_iterations = cmdLine_maxiter
+    
+    num_samples = nrow(X)
+    num_features = ncol(X)
+    
+    if (intercept == 1):
+        ones  = full(1, rows=num_samples, cols=1)
+        X = append(X, ones)
+    
+    num_rows_in_w = num_features
+    if(intercept == 1):
+        num_rows_in_w = num_rows_in_w + 1
+    w = full(0, rows=num_rows_in_w, cols=num_classes)
+    
+    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
+    parfor(iter_class in 1:num_classes):
+        Y_local = 2 * ppred(Y, iter_class, "==") - 1
+        w_class = full(0, rows=num_features, cols=1)
+        if (intercept == 1):
+            zero_matrix = full(0, rows=1, cols=1)
+            w_class = transpose(append(transpose(w_class), zero_matrix))
+        g_old = dot(transpose(X), Y_local)
+        s = g_old
+        
+        Xw = full(0, rows=nrow(X), cols=1)
+        iter = 0
+        continue = 1
+        while(continue == 1):
+            # minimizing primal obj along direction s
+            step_sz = 0
+            Xd = dot(X, s)
+            wd = lambda * sum(w_class * s)
+            dd = lambda * sum(s * s)
+            continue1 = 1
+            while(continue1 == 1):
+                tmp_Xw = Xw + step_sz*Xd
+                out = 1 - Y_local * (tmp_Xw)
+                sv = ppred(out, 0, ">")
+                out = out * sv
+                g = wd + step_sz*dd - sum(out * Y_local * Xd)
+                h = dd + sum(Xd * sv * Xd)
+                step_sz = step_sz - g/h
+                if (g*g/h < 0.0000000001):
+                    continue1 = 0
+            
+            #update weights
+            w_class = w_class + step_sz*s
+            Xw = Xw + step_sz*Xd
+            
+            out = 1 - Y_local * Xw
+            sv = ppred(out, 0, ">")
+            out = sv * out
+            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
+            
+            tmp = sum(s * g_old)
+            
+            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, ">="))/num_samples*100
+            print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+            debug_mat[iter+1,iter_class] = obj
+            
+            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
+                continue = 0
+            
+            #non-linear CG step
+            be = sum(g_new * g_new)/sum(g_old * g_old)
+            s = be * s + g_new
+            g_old = g_new
+            
+            iter = iter + 1
+        # end while(continue == 1)
+        
+        w[,iter_class] = w_class
+    # end parfor(iter_class in 1:num_classes)
+    
+    save(w, $model, format=cmdLine_fmt)
+    
+    debug_str = "# Class, Iter, Obj"
+    for(iter_class in 1:ncol(debug_mat)):
+        for(iter in 1:nrow(debug_mat)):
+            obj = castAsScalar(debug_mat[iter, iter_class])
+            if(obj != -1):
+                debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+    save(debug_str, $Log)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.R b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
index 7715c5e..1844bbb 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.R
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
@@ -1,294 +1,294 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-bivar_ss = function(X, Y) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    m2X = var(X)
-    m2Y = var(Y)
-    sigmaX = sqrt(m2X)
-    sigmaY = sqrt(m2Y)
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = sigmaY))
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(A, B) {
-
-    # Contingency Table
-    F = table(A,B)
-    
-    # Chi-Squared
-    cst = chisq.test(F)
-
-    r = rowSums(F)
-    c = colSums(F)
-    
-    chi_squared = as.numeric(cst[1])
-
-    # compute p-value
-    pValue = as.numeric(cst[3])
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-    return(list("pval" = pval, "contingencyTable" = contingencyTable, "rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Y, A) {
-    # mean and variance in target variable
-    W = length(A)
-    my = mean(Y)
-    varY = var(Y)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = as.matrix(table(A)) 
-
-    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
-
-    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
-    CVars[is.na(CVars)] <- 0
-
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = 1-pf(AnovaF, df1, df2)
-
-    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = CVars))
-}
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library(Matrix)
-
-# input data set
-D = readMM(paste(args[1], "X.mtx", sep=""));
-
-# label attr id (must be a valid index > 0)  
-label_index = as.integer(args[2])
-
-# feature attributes, column vector of indices
-feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = as.integer(args[3]) 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = readMM(paste(args[1], "feature_measurement_levels.mtx", sep="")) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, sz, 1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, sz, 1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, sz, 1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, sz, 1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-	numLabels = length(labels)
-        cmLabels = var(labels)
-    	stdLabels = sqrt(cmLabels)
-	standard_deviations[label_index,1] = stdLabels
-}else{
-	labelCorrection = 1 - min(labels)
-	labels = labels + labelCorrection
-}
-
-mx = apply(D, 2, max)
-mn = apply(D, 2, min)	
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-	feature_index1 = feature_indices[i1,1]
-	num = num_distinct_values[feature_index1]
-	if(feature_measurement_levels[i1,1] == 0 & num >= max_num_distinct_values){
-		max_num_distinct_values = num
-	}
-}
-distinct_label_values = matrix(0, 1, 1)	
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-	maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-	distinct_label_values = as.data.frame(table(labels))$Freq
-	if(max_num_distinct_values != 0){
-		contingencyTableSz = max_num_distinct_values*length(distinct_label_values)
-	}
-	maxNumberOfGroups = max(maxNumberOfGroups, length(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, sz, maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, sz, maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, sz, maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, sz, maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-	featureCounts[label_index,1:length(distinct_label_values)] = distinct_label_values
-	for(i2 in 1:length(distinct_label_values)){
-		featureValues[label_index,i2] = i2-labelCorrection
-	}
-}
-
-for(i3 in 1:nrow(feature_indices)){
-	feature_index2 = feature_indices[i3,1]
-	feature_measurement_level = feature_measurement_levels[i3,1]
-	
-	feature = D[,feature_index2]
-	
-	if(feature_measurement_level == 0){
-		featureCorrection = 1 - min(feature)
-		feature = feature + featureCorrection
-			
-		if(label_measurement_level == feature_measurement_level){
-		  # categorical-categorical
-		  tests[feature_index2,1] = 1
-
-		  ret = bivar_cc(labels, feature)
-                  pVal = ret$pval
-                  contingencyTable = ret$contingencyTable
-                  rowMarginals = ret$rowMarginals
-                  colMarginals = ret$colMarginals
-
-		  stats[feature_index2,1] = pVal
-			
-		  sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-			
-		  contingencyTableCounts = matrix(0, 1, sz3)
-		  contingencyTableLabelValues = matrix(0, 1, sz3)
-		  contingencyTableFeatureValues = matrix(0, 1, sz3)
-			
-            	  for(i4 in 1:nrow(contingencyTable)){
-		  	 for(j in 1:ncol(contingencyTable)){
-					#get rid of this, see *1 below
-					contingencyTableCounts[1, ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
-					
-					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-				}
-			}
-			contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-			
-			featureCounts[feature_index2,1:length(colMarginals)] = colMarginals
-			for(i5 in 1:length(colMarginals)){
-				featureValues[feature_index2,i5] = i5-featureCorrection
-			}
-		}else{
-			# label is scale, feature is categorical
-			tests[feature_index2,1] = 2
-			
-			ret = bivar_sc(labels, feature)
-                  pVal = ret$pVal
-                  frequencies = ret$CFreqs
-                  means = ret$CMeans
-                  variances = ret$CVars
-
-			stats[feature_index2,1] = pVal
-			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
-			for(i6 in 1:nrow(frequencies)){
-				featureValues[feature_index2,i6] = i6 - featureCorrection
-			}
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}else{
-		if(label_measurement_level == feature_measurement_level){
-		  # scale-scale
-		  tests[feature_index2,1] = 3
-
-		  ret = bivar_ss(labels, feature)
-                  r = ret$R
-                  covariance = ret$covXY
-                  stdX = ret$sigmaX
-                  stdY = ret$sigmaY
- 
-		  stats[feature_index2,1] = r
-		  covariances[feature_index2,1] = covariance
-		  standard_deviations[feature_index2,1] = stdY
-		}else{
-		  # label is categorical, feature is scale
-		  tests[feature_index2,1] = 2
-			
-		  ret = bivar_sc(feature, labels)
-		  pVal = ret$pVal
-		  frequencies = ret$CFreqs
-                  means = ret$CMeans
-		  variances = ret$CVars
-			
-		  stats[feature_index2,1] = pVal
-		  featureMeans[feature_index2,1:nrow(means)] = t(means)
-		  featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}
-}
-
-writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
-writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
-writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", sep=""))
-writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], "standard_deviations", sep=""))
-writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], "contingencyTablesCounts", sep=""))
-writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], "contingencyTablesLabelValues", sep=""))
-writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], "contingencyTablesFeatureValues", sep=""))
-writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", sep=""))
-writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", sep=""))
-writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", sep=""))
-writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", sep=""))
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+bivar_ss = function(X, Y) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    m2X = var(X)
+    m2Y = var(Y)
+    sigmaX = sqrt(m2X)
+    sigmaY = sqrt(m2Y)
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = sigmaY))
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(A, B) {
+
+    # Contingency Table
+    F = table(A,B)
+    
+    # Chi-Squared
+    cst = chisq.test(F)
+
+    r = rowSums(F)
+    c = colSums(F)
+    
+    chi_squared = as.numeric(cst[1])
+
+    # compute p-value
+    pValue = as.numeric(cst[3])
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+    return(list("pval" = pval, "contingencyTable" = contingencyTable, "rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Y, A) {
+    # mean and variance in target variable
+    W = length(A)
+    my = mean(Y)
+    varY = var(Y)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = as.matrix(table(A)) 
+
+    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
+
+    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
+    CVars[is.na(CVars)] <- 0
+
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = 1-pf(AnovaF, df1, df2)
+
+    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = CVars))
+}
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library(Matrix)
+
+# input data set
+D = readMM(paste(args[1], "X.mtx", sep=""));
+
+# label attr id (must be a valid index > 0)  
+label_index = as.integer(args[2])
+
+# feature attributes, column vector of indices
+feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = as.integer(args[3]) 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = readMM(paste(args[1], "feature_measurement_levels.mtx", sep="")) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, sz, 1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, sz, 1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, sz, 1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, sz, 1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+	numLabels = length(labels)
+        cmLabels = var(labels)
+    	stdLabels = sqrt(cmLabels)
+	standard_deviations[label_index,1] = stdLabels
+}else{
+	labelCorrection = 1 - min(labels)
+	labels = labels + labelCorrection
+}
+
+mx = apply(D, 2, max)
+mn = apply(D, 2, min)	
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+	feature_index1 = feature_indices[i1,1]
+	num = num_distinct_values[feature_index1]
+	if(feature_measurement_levels[i1,1] == 0 & num >= max_num_distinct_values){
+		max_num_distinct_values = num
+	}
+}
+distinct_label_values = matrix(0, 1, 1)	
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+	maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+	distinct_label_values = as.data.frame(table(labels))$Freq
+	if(max_num_distinct_values != 0){
+		contingencyTableSz = max_num_distinct_values*length(distinct_label_values)
+	}
+	maxNumberOfGroups = max(maxNumberOfGroups, length(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, sz, maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, sz, maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, sz, maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, sz, maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+	featureCounts[label_index,1:length(distinct_label_values)] = distinct_label_values
+	for(i2 in 1:length(distinct_label_values)){
+		featureValues[label_index,i2] = i2-labelCorrection
+	}
+}
+
+for(i3 in 1:nrow(feature_indices)){
+	feature_index2 = feature_indices[i3,1]
+	feature_measurement_level = feature_measurement_levels[i3,1]
+	
+	feature = D[,feature_index2]
+	
+	if(feature_measurement_level == 0){
+		featureCorrection = 1 - min(feature)
+		feature = feature + featureCorrection
+			
+		if(label_measurement_level == feature_measurement_level){
+		  # categorical-categorical
+		  tests[feature_index2,1] = 1
+
+		  ret = bivar_cc(labels, feature)
+                  pVal = ret$pval
+                  contingencyTable = ret$contingencyTable
+                  rowMarginals = ret$rowMarginals
+                  colMarginals = ret$colMarginals
+
+		  stats[feature_index2,1] = pVal
+			
+		  sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+			
+		  contingencyTableCounts = matrix(0, 1, sz3)
+		  contingencyTableLabelValues = matrix(0, 1, sz3)
+		  contingencyTableFeatureValues = matrix(0, 1, sz3)
+			
+            	  for(i4 in 1:nrow(contingencyTable)){
+		  	 for(j in 1:ncol(contingencyTable)){
+					#get rid of this, see *1 below
+					contingencyTableCounts[1, ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
+					
+					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+				}
+			}
+			contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+			
+			featureCounts[feature_index2,1:length(colMarginals)] = colMarginals
+			for(i5 in 1:length(colMarginals)){
+				featureValues[feature_index2,i5] = i5-featureCorrection
+			}
+		}else{
+			# label is scale, feature is categorical
+			tests[feature_index2,1] = 2
+			
+			ret = bivar_sc(labels, feature)
+                  pVal = ret$pVal
+                  frequencies = ret$CFreqs
+                  means = ret$CMeans
+                  variances = ret$CVars
+
+			stats[feature_index2,1] = pVal
+			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
+			for(i6 in 1:nrow(frequencies)){
+				featureValues[feature_index2,i6] = i6 - featureCorrection
+			}
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}else{
+		if(label_measurement_level == feature_measurement_level){
+		  # scale-scale
+		  tests[feature_index2,1] = 3
+
+		  ret = bivar_ss(labels, feature)
+                  r = ret$R
+                  covariance = ret$covXY
+                  stdX = ret$sigmaX
+                  stdY = ret$sigmaY
+ 
+		  stats[feature_index2,1] = r
+		  covariances[feature_index2,1] = covariance
+		  standard_deviations[feature_index2,1] = stdY
+		}else{
+		  # label is categorical, feature is scale
+		  tests[feature_index2,1] = 2
+			
+		  ret = bivar_sc(feature, labels)
+		  pVal = ret$pVal
+		  frequencies = ret$CFreqs
+                  means = ret$CMeans
+		  variances = ret$CVars
+			
+		  stats[feature_index2,1] = pVal
+		  featureMeans[feature_index2,1:nrow(means)] = t(means)
+		  featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}
+}
+
+writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
+writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
+writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", sep=""))
+writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], "standard_deviations", sep=""))
+writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], "contingencyTablesCounts", sep=""))
+writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], "contingencyTablesLabelValues", sep=""))
+writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], "contingencyTablesFeatureValues", sep=""))
+writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", sep=""))
+writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", sep=""))
+writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", sep=""))
+writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", sep=""))
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
index 1e04154..56163ad 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
@@ -1,268 +1,268 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = read($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = read($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-	numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-	standard_deviations[label_index,1] = stdLabels
-}else{
-	labelCorrection = 1 - min(labels)
-	labels = labels + labelCorrection
-}
-
-mx = colMaxs(D)
-mn = colMins(D)	
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-	feature_index1 = castAsScalar(feature_indices[i1,1])
-	num = castAsScalar(num_distinct_values[1,feature_index1])
-	if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values){
-		max_num_distinct_values = num
-	}
-}
-distinct_label_values = matrix(0, rows=1, cols=1)	
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-	maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-	distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
-	if(max_num_distinct_values != 0){
-		contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
-	}
-	maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-	featureCounts[label_index,1:nrow(distinct_label_values)] = t(distinct_label_values)
-	parfor(i2 in 1:nrow(distinct_label_values)){
-		featureValues[label_index,i2] = i2-labelCorrection
-	}
-}
-
-parfor(i3 in 1:nrow(feature_indices), check=0){
-	feature_index2 = castAsScalar(feature_indices[i3,1])
-	feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
-	
-	feature = D[,feature_index2]
-	
-	if(feature_measurement_level == 0){
-		featureCorrection = 1 - min(feature)
-		feature = feature + featureCorrection
-			
-		if(label_measurement_level == feature_measurement_level){
-			# categorical-categorical
-			tests[feature_index2,1] = 1
-			[pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
-			stats[feature_index2,1] = pVal
-			
-			sz3=1
-			if(1==1){
-				sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-			}
-			contingencyTableLabelValues = matrix(0, rows=1, cols=sz3)
-			contingencyTableFeatureValues = matrix(0, rows=1, cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = read($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = read($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+	numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+	standard_deviations[label_index,1] = stdLabels
+}else{
+	labelCorrection = 1 - min(labels)
+	labels = labels + labelCorrection
+}
+
+mx = colMaxs(D)
+mn = colMins(D)	
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+	feature_index1 = castAsScalar(feature_indices[i1,1])
+	num = castAsScalar(num_distinct_values[1,feature_index1])
+	if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values){
+		max_num_distinct_values = num
+	}
+}
+distinct_label_values = matrix(0, rows=1, cols=1)	
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+	maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+	distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
+	if(max_num_distinct_values != 0){
+		contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
+	}
+	maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+	featureCounts[label_index,1:nrow(distinct_label_values)] = t(distinct_label_values)
+	parfor(i2 in 1:nrow(distinct_label_values)){
+		featureValues[label_index,i2] = i2-labelCorrection
+	}
+}
+
+parfor(i3 in 1:nrow(feature_indices), check=0){
+	feature_index2 = castAsScalar(feature_indices[i3,1])
+	feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
+	
+	feature = D[,feature_index2]
+	
+	if(feature_measurement_level == 0){
+		featureCorrection = 1 - min(feature)
+		feature = feature + featureCorrection
 			
-            parfor(i4 in 1:nrow(contingencyTable), check=0){
-				parfor(j in 1:ncol(contingencyTable), check=0){
-					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-				}
-			}
+		if(label_measurement_level == feature_measurement_level){
+			# categorical-categorical
+			tests[feature_index2,1] = 1
+			[pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
+			stats[feature_index2,1] = pVal
+			
+			sz3=1
+			if(1==1){
+				sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+			}
+			contingencyTableLabelValues = matrix(0, rows=1, cols=sz3)
+			contingencyTableFeatureValues = matrix(0, rows=1, cols=sz3)
+			
+            parfor(i4 in 1:nrow(contingencyTable), check=0){
+				parfor(j in 1:ncol(contingencyTable), check=0){
+					contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+					contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+				}
+			}
 			contingencyTableCounts = matrix(contingencyTable, rows=1, cols=sz3, byrow=TRUE)
-            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-			
-			featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
-			parfor(i5 in 1:ncol(colMarginals), check=0){
-				featureValues[feature_index2,i5] = i5-featureCorrection
-			}
-		}else{
-			# label is scale, feature is categorical
-			tests[feature_index2,1] = 2
-			[pVal, frequencies, means, variances] = bivar_sc(labels, feature)
-			stats[feature_index2,1] = pVal
-			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
-			parfor(i6 in 1:nrow(frequencies), check=0){
-				featureValues[feature_index2,i6] = i6 - featureCorrection
-			}
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}else{
-		if(label_measurement_level == feature_measurement_level){
-			# scale-scale
-			tests[feature_index2,1] = 3
-			[r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-			stats[feature_index2,1] = r
-			covariances[feature_index2,1] = covariance
-			standard_deviations[feature_index2,1] = stdY
-		}else{
-			# label is categorical, feature is scale
-			tests[feature_index2,1] = 2
-			[pVal, frequencies, means, variances] = bivar_sc(feature, labels)
-			stats[feature_index2,1] = pVal
-			featureMeans[feature_index2,1:nrow(means)] = t(means)
-			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
-		}
-	}
-}
-
-write(stats, $6, format="text")
-write(tests, $7, format="text")
-write(covariances, $8, format="text")
-write(standard_deviations, $9, format="text")
-write(contingencyTablesCounts, $10, format="text")
-write(contingencyTablesLabelValues, $11, format="text")
-write(contingencyTablesFeatureValues, $12, format="text")
-write(featureValues, $13, format="text")
-write(featureCounts, $14, format="text")
-write(featureMeans, $15, format="text")
-write(featureSTDs, $16, format="text")
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] colMarginals) {
-
-    # Contingency Table
-    FF = table(A,B)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+			contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+			contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+			
+			featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
+			parfor(i5 in 1:ncol(colMarginals), check=0){
+				featureValues[feature_index2,i5] = i5-featureCorrection
+			}
+		}else{
+			# label is scale, feature is categorical
+			tests[feature_index2,1] = 2
+			[pVal, frequencies, means, variances] = bivar_sc(labels, feature)
+			stats[feature_index2,1] = pVal
+			featureCounts[feature_index2,1:nrow(frequencies)] = t(frequencies)
+			parfor(i6 in 1:nrow(frequencies), check=0){
+				featureValues[feature_index2,i6] = i6 - featureCorrection
+			}
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}else{
+		if(label_measurement_level == feature_measurement_level){
+			# scale-scale
+			tests[feature_index2,1] = 3
+			[r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+			stats[feature_index2,1] = r
+			covariances[feature_index2,1] = covariance
+			standard_deviations[feature_index2,1] = stdY
+		}else{
+			# label is categorical, feature is scale
+			tests[feature_index2,1] = 2
+			[pVal, frequencies, means, variances] = bivar_sc(feature, labels)
+			stats[feature_index2,1] = pVal
+			featureMeans[feature_index2,1:nrow(means)] = t(means)
+			featureSTDs[feature_index2,1:nrow(variances)] = t(sqrt(variances))
+		}
+	}
+}
+
+write(stats, $6, format="text")
+write(tests, $7, format="text")
+write(covariances, $8, format="text")
+write(standard_deviations, $9, format="text")
+write(contingencyTablesCounts, $10, format="text")
+write(contingencyTablesLabelValues, $11, format="text")
+write(contingencyTablesFeatureValues, $12, format="text")
+write(featureValues, $13, format="text")
+write(featureCounts, $14, format="text")
+write(featureMeans, $15, format="text")
+write(featureSTDs, $16, format="text")
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, Double covXY, Double sigmaX, Double sigmaY) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] colMarginals) {
+
+    # Contingency Table
+    FF = table(A,B)
+
     tmp = removeEmpty(target=FF, margin="rows"); 
     F = removeEmpty(target=tmp, margin="cols");
 
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (r %*% c)/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)^2/E
-    chi_squared = sum(T)
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
-
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-	# mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (r %*% c)/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)^2/E
+    chi_squared = sum(T)
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
+
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+	# mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
     present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), margin="rows")
     CFreqs = present_domain_vals_mat %*% CFreqs1
 
-    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="mean")
-    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="variance")
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-	anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
-}
+    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="mean")
+    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="variance")
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+	anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
index c899065..7fbc101 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here -----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = load($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = load($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = full(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = full(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = full(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = full(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1):
-    numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-    standard_deviations[label_index,1] = stdLabels
-else:
-    labelCorrection = 1 - min(labels)
-    labels = labels + labelCorrection
-
-mx = colMaxs(D)
-mn = colMins(D)
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)):
-    feature_index1 = castAsScalar(feature_indices[i1,1])
-    num = castAsScalar(num_distinct_values[1,feature_index1])
-    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values):
-        max_num_distinct_values = num
-distinct_label_values = full(0, rows=1, cols=1)
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0):
-    maxNumberOfGroups = max_num_distinct_values
-if(label_measurement_level==0):
-    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
-    if(max_num_distinct_values != 0):
-        contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
-    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-# store for contingency table cell values
-contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0):
-    featureCounts[label_index,1:nrow(distinct_label_values)] = transpose(distinct_label_values)
-    parfor(i2 in 1:nrow(distinct_label_values)):
-        featureValues[label_index,i2] = i2-labelCorrection
-
-parfor(i3 in 1:nrow(feature_indices), check=0):
-    feature_index2 = castAsScalar(feature_indices[i3,1])
-    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
-    
-    feature = D[,feature_index2]
-    
-    if(feature_measurement_level == 0):
-        featureCorrection = 1 - min(feature)
-        feature = feature + featureCorrection
-        
-        if(label_measurement_level == feature_measurement_level):
-            # categorical-categorical
-            tests[feature_index2,1] = 1
-            [pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
-            stats[feature_index2,1] = pVal
-            
-            sz3=1
-            if(1==1):
-                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
-            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here -----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = load($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = load($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = full(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = full(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = full(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = full(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1):
+    numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+    standard_deviations[label_index,1] = stdLabels
+else:
+    labelCorrection = 1 - min(labels)
+    labels = labels + labelCorrection
+
+mx = colMaxs(D)
+mn = colMins(D)
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)):
+    feature_index1 = castAsScalar(feature_indices[i1,1])
+    num = castAsScalar(num_distinct_values[1,feature_index1])
+    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= max_num_distinct_values):
+        max_num_distinct_values = num
+distinct_label_values = full(0, rows=1, cols=1)
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0):
+    maxNumberOfGroups = max_num_distinct_values
+if(label_measurement_level==0):
+    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
+    if(max_num_distinct_values != 0):
+        contingencyTableSz = max_num_distinct_values*nrow(distinct_label_values)
+    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+# store for contingency table cell values
+contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0):
+    featureCounts[label_index,1:nrow(distinct_label_values)] = transpose(distinct_label_values)
+    parfor(i2 in 1:nrow(distinct_label_values)):
+        featureValues[label_index,i2] = i2-labelCorrection
+
+parfor(i3 in 1:nrow(feature_indices), check=0):
+    feature_index2 = castAsScalar(feature_indices[i3,1])
+    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
+    
+    feature = D[,feature_index2]
+    
+    if(feature_measurement_level == 0):
+        featureCorrection = 1 - min(feature)
+        feature = feature + featureCorrection
+        
+        if(label_measurement_level == feature_measurement_level):
+            # categorical-categorical
+            tests[feature_index2,1] = 1
+            [pVal, contingencyTable, rowMarginals, colMarginals] = bivar_cc(labels, feature)
+            stats[feature_index2,1] = pVal
+            
+            sz3=1
+            if(1==1):
+                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
+            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
             
-            parfor(i4 in 1:nrow(contingencyTable), check=0):
-                parfor(j in 1:ncol(contingencyTable), check=0):
-                    contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-                    contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+            parfor(i4 in 1:nrow(contingencyTable), check=0):
+                parfor(j in 1:ncol(contingencyTable), check=0):
+                    contingencyTableLabelValues[1, ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+                    contingencyTableFeatureValues[1, ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
             contingencyTableCounts = contingencyTable.reshape(rows=1, cols=sz3)
-            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
-            
-            contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
-            contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
-            
-            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
-            parfor(i5 in 1:ncol(colMarginals), check=0):
-                featureValues[feature_index2,i5] = i5-featureCorrection
-        else:
-            # label is scale, feature is categorical
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
-            stats[feature_index2,1] = pVal
-            featureCounts[feature_index2,1:nrow(frequencies)] = transpose(frequencies)
-            parfor(i6 in 1:nrow(frequencies), check=0):
-                featureValues[feature_index2,i6] = i6 - featureCorrection
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
-    else:
-        if(label_measurement_level == feature_measurement_level):
-            # scale-scale
-            tests[feature_index2,1] = 3
-            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-            stats[feature_index2,1] = r
-            covariances[feature_index2,1] = covariance
-            standard_deviations[feature_index2,1] = stdY
-        else:
-            # label is categorical, feature is scale
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
-            stats[feature_index2,1] = pVal
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
-    # end if(feature_measurement_level == 0)
-# end parfor(i3 in 1:nrow(feature_indices), check=0)
-
-save(stats, $6, format="text")
-save(tests, $7, format="text")
-save(covariances, $8, format="text")
-save(standard_deviations, $9, format="text")
-save(contingencyTablesCounts, $10, format="text")
-save(contingencyTablesLabelValues, $11, format="text")
-save(contingencyTablesFeatureValues, $12, format="text")
-save(featureValues, $13, format="text")
-save(featureCounts, $14, format="text")
-save(featureMeans, $15, format="text")
-save(featureSTDs, $16, format="text")
-
-# -----------------------------------------------------------------------------------------------------------
-
-def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, sigmaX:float, sigmaY:float):
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-    
-    # compute standard deviations for both X and Y by computing 2^nd central moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-    
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-# -----------------------------------------------------------------------------------------------------------
-
-def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, contingencyTable:matrix[float], rowMarginals:matrix[float], colMarginals:matrix[float]):
-    # Contingency Table
-    FF = table(A,B)
-    
-    tmp = removeEmpty(target=FF, axis=0)
-    F = removeEmpty(target=tmp, axis=1)
-    
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (dot(r, c))/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)**2/E
-    chi_squared = sum(T)
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
-    
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
-    # mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-    
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
-    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), axis=0)
-    CFreqs = dot(present_domain_vals_mat, CFreqs1)
-    
-    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="mean"))
-    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="variance"))
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-    
-    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = contingencyTableCounts
+            
+            contingencyTablesLabelValues[feature_index2,1:sz3] = contingencyTableLabelValues
+            contingencyTablesFeatureValues[feature_index2,1:sz3] = contingencyTableFeatureValues
+            
+            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
+            parfor(i5 in 1:ncol(colMarginals), check=0):
+                featureValues[feature_index2,i5] = i5-featureCorrection
+        else:
+            # label is scale, feature is categorical
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
+            stats[feature_index2,1] = pVal
+            featureCounts[feature_index2,1:nrow(frequencies)] = transpose(frequencies)
+            parfor(i6 in 1:nrow(frequencies), check=0):
+                featureValues[feature_index2,i6] = i6 - featureCorrection
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
+    else:
+        if(label_measurement_level == feature_measurement_level):
+            # scale-scale
+            tests[feature_index2,1] = 3
+            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+            stats[feature_index2,1] = r
+            covariances[feature_index2,1] = covariance
+            standard_deviations[feature_index2,1] = stdY
+        else:
+            # label is categorical, feature is scale
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
+            stats[feature_index2,1] = pVal
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = transpose(sqrt(variances))
+    # end if(feature_measurement_level == 0)
+# end parfor(i3 in 1:nrow(feature_indices), check=0)
+
+save(stats, $6, format="text")
+save(tests, $7, format="text")
+save(covariances, $8, format="text")
+save(standard_deviations, $9, format="text")
+save(contingencyTablesCounts, $10, format="text")
+save(contingencyTablesLabelValues, $11, format="text")
+save(contingencyTablesFeatureValues, $12, format="text")
+save(featureValues, $13, format="text")
+save(featureCounts, $14, format="text")
+save(featureMeans, $15, format="text")
+save(featureSTDs, $16, format="text")
+
+# -----------------------------------------------------------------------------------------------------------
+
+def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, sigmaX:float, sigmaY:float):
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+    
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+    
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+# -----------------------------------------------------------------------------------------------------------
+
+def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, contingencyTable:matrix[float], rowMarginals:matrix[float], colMarginals:matrix[float]):
+    # Contingency Table
+    FF = table(A,B)
+    
+    tmp = removeEmpty(target=FF, axis=0)
+    F = removeEmpty(target=tmp, axis=1)
+    
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (dot(r, c))/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)**2/E
+    chi_squared = sum(T)
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
+    
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
+    # mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+    
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), axis=0)
+    CFreqs = dot(present_domain_vals_mat, CFreqs1)
+    
+    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="mean"))
+    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="variance"))
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+    
+    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
index dc65b8a..a3ca47a 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = as.double(args[3]);
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, 1, numFeatures)
-repClassSums = classSums %*% ones;
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, numRows, 1)
-D_w_ones = cbind(D, ones)
-model = cbind(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = max.col(log_probs,ties.method="last");
-acc = sum(pred == C) / numRows * 100
-
-print(paste("Training Accuracy (%): ", acc, sep=""))
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = as.double(args[3]);
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, 1, numFeatures)
+repClassSums = classSums %*% ones;
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, numRows, 1)
+D_w_ones = cbind(D, ones)
+model = cbind(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = max.col(log_probs,ties.method="last");
+acc = sum(pred == C) / numRows * 100
+
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], "conditionals", sep=""));