You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:34:18 UTC
[42/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/l2-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml
index 04b83b0..140ac5a 100644
--- a/scripts/algorithms/l2-svm.dml
+++ b/scripts/algorithms/l2-svm.dml
@@ -1,159 +1,159 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements binary-class SVM with squared slack variables
-#
-# Example Usage:
-# Assume L2SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume epsilon = 0.001, lambda = 1, maxiterations = 100
-#
-# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-# Note about inputs: 
-# Assumes that labels (entries in Y) 
-# are set to either -1 or +1
-# or the result of recoding
-#
-
-cmdLine_fmt = ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol = ifdef($tol, 0.001)
-cmdLine_reg = ifdef($reg, 1.0)
-cmdLine_maxiter = ifdef($maxiter, 100)
-
-X = read($X)
-Y = read($Y)
-
-if(nrow(X) < 2)
-	stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(ppred(Y, check_min, "=="))
-num_max = sum(ppred(Y, check_max, "=="))
-
-if(check_min == check_max)
-	stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
-
-if(num_min + num_max != nrow(Y))
-	stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
-	
-if(check_min != -1 | check_max != +1) 
-	Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-
-positive_label = check_max
-negative_label = check_min
-
-continue = 1
-
-intercept = cmdLine_icpt
-if(intercept != 0 & intercept != 1)
-	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
-
-epsilon = cmdLine_tol
-if(epsilon < 0)
-	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
-	
-lambda = cmdLine_reg
-if(lambda < 0)
-	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-	
-maxiterations = cmdLine_maxiter
-if(maxiterations < 1)
-	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
-
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1, rows=num_samples, cols=1)
-	X = append(X, ones);
-}
-
-num_rows_in_w = dimensions
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-Xw = matrix(0, rows=nrow(X), cols=1)
-debug_str = "# Iter, Obj"
-iter = 0
-while(continue == 1 & iter < maxiterations)  {
-	# minimizing primal obj along direction s
-    step_sz = 0
-    Xd = X %*% s
-    wd = lambda * sum(w * s)
-    dd = lambda * sum(s * s)
-    continue1 = 1
-    while(continue1 == 1){
-		tmp_Xw = Xw + step_sz*Xd
-      	out = 1 - Y * (tmp_Xw)
-      	sv = ppred(out, 0, ">")
-      	out = out * sv
-      	g = wd + step_sz*dd - sum(out * Y * Xd)
-      	h = dd + sum(Xd * sv * Xd)
-      	step_sz = step_sz - g/h
-      	if (g*g/h < 0.0000000001){
-        	continue1 = 0
-      	}
-    }
-
-    #update weights
-    w = w + step_sz*s
-	Xw = Xw + step_sz*Xd
-	
-    out = 1 - Y * Xw
-    sv = ppred(out, 0, ">")
-    out = sv * out
-    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-    g_new = t(X) %*% (out * Y) - lambda * w
-
-    print("ITER " + iter + ": OBJ=" + obj)
-	debug_str = append(debug_str, iter + "," + obj)
-	
-    tmp = sum(s * g_old)
-    if(step_sz*tmp < epsilon*obj){
-    	continue = 0
-    }
-
-    #non-linear CG step
-    be = sum(g_new * g_new)/sum(g_old * g_old)
-    s = be * s + g_new
-    g_old = g_new
-
-    iter = iter + 1
-}
-
-extra_model_params = matrix(0, rows=4, cols=1)
-extra_model_params[1,1] = positive_label
-extra_model_params[2,1] = negative_label
-extra_model_params[3,1] = intercept
-extra_model_params[4,1] = dimensions
-
-w = t(append(t(w), t(extra_model_params)))
-write(w, $model, format=cmdLine_fmt)
-
-write(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements binary-class SVM with squared slack variables
+#
+# Example Usage:
+# Assume L2SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume epsilon = 0.001, lambda = 1, maxiterations = 100
+#
+# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+# Note about inputs: 
+# Assumes that labels (entries in Y) 
+# are set to either -1 or +1
+# or the result of recoding
+#
+
+cmdLine_fmt = ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol = ifdef($tol, 0.001)
+cmdLine_reg = ifdef($reg, 1.0)
+cmdLine_maxiter = ifdef($maxiter, 100)
+
+X = read($X)
+Y = read($Y)
+
+if(nrow(X) < 2)
+	stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(ppred(Y, check_min, "=="))
+num_max = sum(ppred(Y, check_max, "=="))
+
+if(check_min == check_max)
+	stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
+
+if(num_min + num_max != nrow(Y))
+	stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
+	
+if(check_min != -1 | check_max != +1) 
+	Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+
+positive_label = check_max
+negative_label = check_min
+
+continue = 1
+
+intercept = cmdLine_icpt
+if(intercept != 0 & intercept != 1)
+	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
+
+epsilon = cmdLine_tol
+if(epsilon < 0)
+	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
+	
+lambda = cmdLine_reg
+if(lambda < 0)
+	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
+	
+maxiterations = cmdLine_maxiter
+if(maxiterations < 1)
+	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
+
+num_samples = nrow(X)
+dimensions = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1)
+	X = append(X, ones);
+}
+
+num_rows_in_w = dimensions
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, rows=num_rows_in_w, cols=1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+Xw = matrix(0, rows=nrow(X), cols=1)
+debug_str = "# Iter, Obj"
+iter = 0
+while(continue == 1 & iter < maxiterations)  {
+	# minimizing primal obj along direction s
+    step_sz = 0
+    Xd = X %*% s
+    wd = lambda * sum(w * s)
+    dd = lambda * sum(s * s)
+    continue1 = 1
+    while(continue1 == 1){
+		tmp_Xw = Xw + step_sz*Xd
+      	out = 1 - Y * (tmp_Xw)
+      	sv = ppred(out, 0, ">")
+      	out = out * sv
+      	g = wd + step_sz*dd - sum(out * Y * Xd)
+      	h = dd + sum(Xd * sv * Xd)
+      	step_sz = step_sz - g/h
+      	if (g*g/h < 0.0000000001){
+        	continue1 = 0
+      	}
+    }
+
+    #update weights
+    w = w + step_sz*s
+	Xw = Xw + step_sz*Xd
+	
+    out = 1 - Y * Xw
+    sv = ppred(out, 0, ">")
+    out = sv * out
+    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+    g_new = t(X) %*% (out * Y) - lambda * w
+
+    print("ITER " + iter + ": OBJ=" + obj)
+	debug_str = append(debug_str, iter + "," + obj)
+	
+    tmp = sum(s * g_old)
+    if(step_sz*tmp < epsilon*obj){
+    	continue = 0
+    }
+
+    #non-linear CG step
+    be = sum(g_new * g_new)/sum(g_old * g_old)
+    s = be * s + g_new
+    g_old = g_new
+
+    iter = iter + 1
+}
+
+extra_model_params = matrix(0, rows=4, cols=1)
+extra_model_params[1,1] = positive_label
+extra_model_params[2,1] = negative_label
+extra_model_params[3,1] = intercept
+extra_model_params[4,1] = dimensions
+
+w = t(append(t(w), t(extra_model_params)))
+write(w, $model, format=cmdLine_fmt)
+
+write(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/m-svm-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm-predict.dml b/scripts/algorithms/m-svm-predict.dml
index 4d0c736..ba06cf6 100644
--- a/scripts/algorithms/m-svm-predict.dml
+++ b/scripts/algorithms/m-svm-predict.dml
@@ -1,84 +1,84 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# This script can be used to compute label predictions
-# Meant for use with an SVM model (learnt using m-svm.dml) on a held out test set
-#
-# Given ground truth labels, the script will compute an 
-# accuracy (%) for the predictions
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
-#													 
-
-cmdLine_Y = ifdef($Y, " ")
-cmdLine_confusion = ifdef($confusion, " ")
-cmdLine_accuracy = ifdef($accuracy, " ")
-cmdLine_scores = ifdef($scores, " ")
-cmdLine_fmt = ifdef($fmt, "text")
-
-X = read($X);
-W = read($model);
-
-dimensions = as.scalar(W[nrow(W),1])
-if(dimensions != ncol(X))
-	stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
-
-intercept = as.scalar(W[nrow(W)-1,1])
-W = W[1:(nrow(W)-2),]
-
-N = nrow(X);
-num_classes = ncol(W)
-m=ncol(X);
-
-b = matrix(0, rows=1, cols=num_classes)
-if (intercept == 1)
-	b = W[m+1,]
-
-ones = matrix(1, rows=N, cols=1)
-scores = X %*% W[1:m,] + ones %*% b;
-	
-if(cmdLine_scores != " ")
-	write(scores, cmdLine_scores, format=cmdLine_fmt);
-
-if(cmdLine_Y != " "){
-	y = read(cmdLine_Y);
-	
-	if(min(y) < 1)
-		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
-	
-	pred = rowIndexMax(scores);
-	correct_percentage = sum(ppred(pred - y, 0, "==")) / N * 100;
-	
-	acc_str = "Accuracy (%): " + correct_percentage
-	print(acc_str)
-	if(cmdLine_accuracy != " ")
-		write(acc_str, cmdLine_accuracy)
-
-	num_classes_ground_truth = max(y)
-	if(num_classes < num_classes_ground_truth)
-		num_classes = num_classes_ground_truth
-
-	if(cmdLine_confusion != " "){
-		confusion_mat = table(pred, y, num_classes, num_classes)
-		write(confusion_mat, cmdLine_confusion, format="csv")
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script can be used to compute label predictions
+# Meant for use with an SVM model (learnt using m-svm.dml) on a held out test set
+#
+# Given ground truth labels, the script will compute an 
+# accuracy (%) for the predictions
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
+#													 
+
+cmdLine_Y = ifdef($Y, " ")
+cmdLine_confusion = ifdef($confusion, " ")
+cmdLine_accuracy = ifdef($accuracy, " ")
+cmdLine_scores = ifdef($scores, " ")
+cmdLine_fmt = ifdef($fmt, "text")
+
+X = read($X);
+W = read($model);
+
+dimensions = as.scalar(W[nrow(W),1])
+if(dimensions != ncol(X))
+	stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
+
+intercept = as.scalar(W[nrow(W)-1,1])
+W = W[1:(nrow(W)-2),]
+
+N = nrow(X);
+num_classes = ncol(W)
+m=ncol(X);
+
+b = matrix(0, rows=1, cols=num_classes)
+if (intercept == 1)
+	b = W[m+1,]
+
+ones = matrix(1, rows=N, cols=1)
+scores = X %*% W[1:m,] + ones %*% b;
+	
+if(cmdLine_scores != " ")
+	write(scores, cmdLine_scores, format=cmdLine_fmt);
+
+if(cmdLine_Y != " "){
+	y = read(cmdLine_Y);
+	
+	if(min(y) < 1)
+		stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+	
+	pred = rowIndexMax(scores);
+	correct_percentage = sum(ppred(pred - y, 0, "==")) / N * 100;
+	
+	acc_str = "Accuracy (%): " + correct_percentage
+	print(acc_str)
+	if(cmdLine_accuracy != " ")
+		write(acc_str, cmdLine_accuracy)
+
+	num_classes_ground_truth = max(y)
+	if(num_classes < num_classes_ground_truth)
+		num_classes = num_classes_ground_truth
+
+	if(cmdLine_confusion != " "){
+		confusion_mat = table(pred, y, num_classes, num_classes)
+		write(confusion_mat, cmdLine_confusion, format="csv")
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/m-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm.dml b/scripts/algorithms/m-svm.dml
index c570872..560d46f 100644
--- a/scripts/algorithms/m-svm.dml
+++ b/scripts/algorithms/m-svm.dml
@@ -1,174 +1,174 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume epsilon = 0.001, lambda=1.0, max_iterations = 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt = ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol = ifdef($tol, 0.001)
-cmdLine_reg = ifdef($reg, 1.0)
-cmdLine_maxiter = ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
-
-X = read($X)
-
-if(nrow(X) < 2)
-	stop("Stopping due to invalid inputs: Not possible to learn a classifier without at least 2 rows")
-
-dimensions = ncol(X)
-
-Y = read($Y)
-
-if(nrow(X) != nrow(Y))
-	stop("Stopping due to invalid argument: Numbers of rows in X and Y must match")
-
-intercept = cmdLine_icpt
-if(intercept != 0 & intercept != 1)
-	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
-
-min_y = min(Y)
-if(min_y < 1)
-	stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
-num_classes = max(Y)
-if(num_classes == 1)
-	stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier")	
-mod1 = Y %% 1
-mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "==")))
-if(mod1_should_be_nrow != nrow(Y))
-	stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels")
-	
-epsilon = cmdLine_tol
-if(epsilon < 0)
-	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
-
-lambda = cmdLine_reg
-if(lambda < 0)
-	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-
-max_iterations = cmdLine_maxiter
-if(max_iterations < 1)
-	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
-
-num_samples = nrow(X)
-num_features = ncol(X)
-
-if (intercept == 1) {
-	ones  = matrix(1, rows=num_samples, cols=1);
-	X = append(X, ones);
-}
-
-num_rows_in_w = num_features
-if(intercept == 1){
-	num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=num_classes)
-
-debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
-parfor(iter_class in 1:num_classes){		  
-	Y_local = 2 * ppred(Y, iter_class, "==") - 1
-	w_class = matrix(0, rows=num_features, cols=1)
-	if (intercept == 1) {
-		zero_matrix = matrix(0, rows=1, cols=1);
-		w_class = t(append(t(w_class), zero_matrix));
-	}
- 
-	g_old = t(X) %*% Y_local
-	s = g_old
-
-	Xw = matrix(0, rows=nrow(X), cols=1)
-	iter = 0
-	continue = 1
-	while(continue == 1)  {
-		# minimizing primal obj along direction s
- 		step_sz = 0
- 		Xd = X %*% s
- 		wd = lambda * sum(w_class * s)
-		dd = lambda * sum(s * s)
-		continue1 = 1
-		while(continue1 == 1){
- 			tmp_Xw = Xw + step_sz*Xd
- 			out = 1 - Y_local * (tmp_Xw)
- 			sv = ppred(out, 0, ">")
- 			out = out * sv
- 			g = wd + step_sz*dd - sum(out * Y_local * Xd)
- 			h = dd + sum(Xd * sv * Xd)
- 			step_sz = step_sz - g/h
- 			if (g*g/h < 0.0000000001){
-			continue1 = 0
-		}
-	}
- 
-		#update weights
-		w_class = w_class + step_sz*s
-		Xw = Xw + step_sz*Xd
- 
-		out = 1 - Y_local * Xw
-		sv = ppred(out, 0, ">")
-		out = sv * out
-		obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-  		g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-  		tmp = sum(s * g_old)
-  
-  		train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
-  		print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
-  		debug_mat[iter+1,iter_class] = obj	   
-   
-  		if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-   			continue = 0
-  		}
- 
-  		#non-linear CG step
-  		be = sum(g_new * g_new)/sum(g_old * g_old)
-  		s = be * s + g_new
-  		g_old = g_new
-
-  		iter = iter + 1
- 	}
-
-	w[,iter_class] = w_class
-}
-
-extra_model_params = matrix(0, rows=2, cols=ncol(w))
-extra_model_params[1, 1] = intercept
-extra_model_params[2, 1] = dimensions
-w = t(append(t(w), t(extra_model_params)))
-write(w, $model, format=cmdLine_fmt)
-
-debug_str = "# Class, Iter, Obj"
-for(iter_class in 1:ncol(debug_mat)){
-	for(iter in 1:nrow(debug_mat)){
-		obj = castAsScalar(debug_mat[iter, iter_class])
-		if(obj != -1) 
-			debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
-	}
-}
-write(debug_str, $Log)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume epsilon = 0.001, lambda=1.0, max_iterations = 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt = ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol = ifdef($tol, 0.001)
+cmdLine_reg = ifdef($reg, 1.0)
+cmdLine_maxiter = ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + " maxiter=" + cmdLine_maxiter)
+
+X = read($X)
+
+if(nrow(X) < 2)
+	stop("Stopping due to invalid inputs: Not possible to learn a classifier without at least 2 rows")
+
+dimensions = ncol(X)
+
+Y = read($Y)
+
+if(nrow(X) != nrow(Y))
+	stop("Stopping due to invalid argument: Numbers of rows in X and Y must match")
+
+intercept = cmdLine_icpt
+if(intercept != 0 & intercept != 1)
+	stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
+
+min_y = min(Y)
+if(min_y < 1)
+	stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+num_classes = max(Y)
+if(num_classes == 1)
+	stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier")	
+mod1 = Y %% 1
+mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "==")))
+if(mod1_should_be_nrow != nrow(Y))
+	stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels")
+	
+epsilon = cmdLine_tol
+if(epsilon < 0)
+	stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
+
+lambda = cmdLine_reg
+if(lambda < 0)
+	stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
+
+max_iterations = cmdLine_maxiter
+if(max_iterations < 1)
+	stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
+
+num_samples = nrow(X)
+num_features = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1);
+	X = append(X, ones);
+}
+
+num_rows_in_w = num_features
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, rows=num_rows_in_w, cols=num_classes)
+
+debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+parfor(iter_class in 1:num_classes){		  
+	Y_local = 2 * ppred(Y, iter_class, "==") - 1
+	w_class = matrix(0, rows=num_features, cols=1)
+	if (intercept == 1) {
+		zero_matrix = matrix(0, rows=1, cols=1);
+		w_class = t(append(t(w_class), zero_matrix));
+	}
+ 
+	g_old = t(X) %*% Y_local
+	s = g_old
+
+	Xw = matrix(0, rows=nrow(X), cols=1)
+	iter = 0
+	continue = 1
+	while(continue == 1)  {
+		# minimizing primal obj along direction s
+ 		step_sz = 0
+ 		Xd = X %*% s
+ 		wd = lambda * sum(w_class * s)
+		dd = lambda * sum(s * s)
+		continue1 = 1
+		while(continue1 == 1){
+ 			tmp_Xw = Xw + step_sz*Xd
+ 			out = 1 - Y_local * (tmp_Xw)
+ 			sv = ppred(out, 0, ">")
+ 			out = out * sv
+ 			g = wd + step_sz*dd - sum(out * Y_local * Xd)
+ 			h = dd + sum(Xd * sv * Xd)
+ 			step_sz = step_sz - g/h
+ 			if (g*g/h < 0.0000000001){
+			continue1 = 0
+		}
+	}
+ 
+		#update weights
+		w_class = w_class + step_sz*s
+		Xw = Xw + step_sz*Xd
+ 
+		out = 1 - Y_local * Xw
+		sv = ppred(out, 0, ">")
+		out = sv * out
+		obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+  		g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+  		tmp = sum(s * g_old)
+  
+  		train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100
+  		print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc)
+  		debug_mat[iter+1,iter_class] = obj	   
+   
+  		if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
+   			continue = 0
+  		}
+ 
+  		#non-linear CG step
+  		be = sum(g_new * g_new)/sum(g_old * g_old)
+  		s = be * s + g_new
+  		g_old = g_new
+
+  		iter = iter + 1
+ 	}
+
+	w[,iter_class] = w_class
+}
+
+extra_model_params = matrix(0, rows=2, cols=ncol(w))
+extra_model_params[1, 1] = intercept
+extra_model_params[2, 1] = dimensions
+w = t(append(t(w), t(extra_model_params)))
+write(w, $model, format=cmdLine_fmt)
+
+debug_str = "# Class, Iter, Obj"
+for(iter_class in 1:ncol(debug_mat)){
+	for(iter in 1:nrow(debug_mat)){
+		obj = castAsScalar(debug_mat[iter, iter_class])
+		if(obj != -1) 
+			debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+	}
+}
+write(debug_str, $Log)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/random-forest-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/random-forest-predict.dml b/scripts/algorithms/random-forest-predict.dml
index 2d99670..7bc6cd6 100644
--- a/scripts/algorithms/random-forest-predict.dml
+++ b/scripts/algorithms/random-forest-predict.dml
@@ -1,193 +1,193 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A RANDOM FOREST MODEL ON A HELD OUT TEST SET 
-# OR FOR COMPUTING THE OUT-OF-BAG ERROR ON THE TRAINING SET.
-#
-# INPUT         PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME          TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X             String   ---          Location to read test feature matrix or training feature matrix for computing Out-Of-Bag error; 
-#									  note that X needs to be both recoded and dummy coded 
-# Y	 		    String   " "		  Location to read true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
-# R   	  		String   " "	      Location to read the matrix R which for each feature in X contains the following information 
-#										- R[,1]: column ids
-#										- R[,2]: start indices 
-#										- R[,3]: end indices
-#									  If R is not provided by default all variables are assumed to be scale
-# M             String 	 ---	   	  Location to read matrix M containing the learned tree i the following format
-#								 		- M[1,j]: id of node j (in a complete binary tree)
-#										- M[2,j]: tree id 
-#	 									- M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#	 									- M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
-#	 									- M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
-#		     									  otherwise the label that leaf node j is supposed to predict
-#	 									- M[6,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
-#			 									  stored in rows 7,8,... if j is categorical 
-#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
-#	 									- M[7:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[7,j] 
-#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 7,8,... 
-#												   depict the value subset chosen for j
-#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-# C 			String   " "		  Location to read the counts matrix containing the number of times samples are chosen in each tree of the random forest
-# P				String   ---		  Location to store the label predictions for X
-# A     		String   " "          Location to store the test accuracy (%) for the prediction if requested
-# OOB 			String   " "		  If C is provided location to store the Out-Of-Bag (OOB) error of the learned model 
-# CM     		String   " "		  Location to store the confusion matrix if requested 
-# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-#	1- Matrix Y containing the predicted labels for X 
-#   2- Test accuracy if requested
-#   3- Confusion matrix C if requested
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f random-forest-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
-#														A=OUTPUT_DIR/accurcay CM=OUTPUT_DIR/confusion fmt=csv
-
-fileX = $X;
-fileM = $M;
-fileP = $P;
-fileY = ifdef ($Y, " ");
-fileR = ifdef ($R, " ");
-fileC = ifdef ($C, " ");
-fileOOB = ifdef ($OOB, " ");
-fileCM = ifdef ($CM, " ");
-fileA = ifdef ($A, " ");
-fmtO = ifdef ($fmt, "text");
-X = read (fileX);
-M = read (fileM);
-
-num_records = nrow (X);
-Y_predicted = matrix (0, rows = num_records, cols = 1);
-num_trees  = max (M[2,]);
-num_labels = max (M[5,]);
-num_nodes_per_tree = aggregate (target = t (M[2,]), groups = t (M[2,]), fn = "count");
-num_nodes_per_tree_cum = cumsum (num_nodes_per_tree);
-
-R_cat = matrix (0, rows = 1, cols = 1);
-R_scale = matrix (0, rows = 1, cols = 1);
-
-if (fileR != " ") {
-	R = read (fileR);
-	dummy_coded = ppred (R[,2], R[,3], "!=");
-	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
-	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
-} else { # only scale features available
-	R_scale = seq (1, ncol (X));
-}
-
-if (fileC != " ") {
-	C = read (fileC);
-	label_counts_oob = matrix (0, rows = num_records, cols = num_labels);
-}
-
-label_counts = matrix (0, rows = num_records, cols = num_labels); 
-parfor (i in 1:num_records, check = 0) {
-	cur_sample = X[i,];
-	cur_node_pos = 1;
-	# cur_node = 1;
-	cur_tree = 1;
-	start_ind = 1;
-	labels_found = FALSE;
-	while (!labels_found) {
-		
-		cur_feature = as.scalar (M[4,cur_node_pos]);
-		type_label = as.scalar (M[5,cur_node_pos]);
-		if (cur_feature == 0) { # leaf found
-			label_counts[i,type_label] = label_counts[i,type_label] + 1;
-			if (fileC != " ") {
-				if (as.scalar (C[i,cur_tree]) == 0) label_counts_oob[i,type_label] = label_counts_oob[i,type_label] + 1;
-			}
-			if (cur_tree < num_trees) {
-				cur_node_pos = as.scalar (num_nodes_per_tree_cum[cur_tree,]) + 1;
-			} else if (cur_tree == num_trees) {
-				labels_found = TRUE;
-			}
-			cur_tree = cur_tree + 1;
-		} else {
-			# determine type: 1 for scale, 2 for categorical 
-			if (type_label == 1) { # scale feature
-				cur_start_ind = as.scalar (R_scale[cur_feature,]);
-				cur_value = as.scalar (cur_sample[,cur_start_ind]);
-				cur_split = as.scalar (M[7,cur_node_pos]);
-				if (cur_value < cur_split) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);					
-				}
-			} else if (type_label == 2) { # categorical feature				
-				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
-				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
-				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); 
-				cur_offset = as.scalar (M[6,cur_node_pos]);
-				value_found = sum (ppred (M[7:(7 + cur_offset - 1),cur_node_pos], cur_value, "=="));
-				if (value_found >= 1) { # go to left branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
-				} else { # go to right branch
-					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
-					# cur_node = as.scalar (cur_M[1,cur_node_pos]);						
-				}
-		
-			}
-}}}
-
-Y_predicted = rowIndexMax (label_counts);
-write (Y_predicted, fileP, format = fmtO);
-
-if (fileY != " ") {
-	Y_dummy = read (fileY);
-	num_classes = ncol (Y_dummy);
-	Y = rowSums (Y_dummy * t (seq (1, num_classes)));
-	result = ppred (Y, Y_predicted, "==");
-	result = sum (result);
-	accuracy = result / num_records * 100;
-	acc_str = "Accuracy (%): " + accuracy;
-	if (fileA != " ") {
-		write (acc_str, fileA, format = fmtO);
-	} else {
-		print (acc_str);
-	}
-	if (fileC != " ") {
-		oob_ind = ppred (rowSums (label_counts_oob), 0, ">")
-		label_counts_oob = removeEmpty (target = label_counts_oob, margin = "rows");
-		num_oob = nrow (label_counts_oob);
-		Y_predicted_oob = rowIndexMax (label_counts_oob);
-		Y_oob = removeEmpty (target = Y * oob_ind, margin = "rows");
-		result = ppred (Y_oob, Y_predicted_oob, "==");
-		oob_error = (1 - (sum (result) / num_oob)) * 100;
-		oob_str = "Out-Of-Bag error (%): " + oob_error;
-		if (fileOOB != " ") {
-			write (oob_str, fileOOB, format = fmtO);
-		} else {
-			print (oob_str);
-		}
-	}
-	if (fileCM != " ") {
-		confusion_mat = table(Y_predicted, Y, num_classes, num_classes)
-        write(confusion_mat, fileCM, format = fmtO)
-	}
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A RANDOM FOREST MODEL ON A HELD OUT TEST SET 
+# OR FOR COMPUTING THE OUT-OF-BAG ERROR ON THE TRAINING SET.
+#
+# INPUT         PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME          TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X             String   ---          Location to read test feature matrix or training feature matrix for computing Out-Of-Bag error; 
+#									  note that X needs to be both recoded and dummy coded 
+# Y	 		    String   " "		  Location to read true label matrix Y if requested; note that Y needs to be both recoded and dummy coded
+# R   	  		String   " "	      Location to read the matrix R which for each feature in X contains the following information 
+#										- R[,1]: column ids
+#										- R[,2]: start indices 
+#										- R[,3]: end indices
+#									  If R is not provided by default all variables are assumed to be scale
+# M             String 	 ---	   	  Location to read matrix M containing the learned tree i the following format
+#								 		- M[1,j]: id of node j (in a complete binary tree)
+#										- M[2,j]: tree id 
+#	 									- M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+#	 									- M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
+#	 									- M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2 for categorical features, 
+#		     									  otherwise the label that leaf node j is supposed to predict
+#	 									- M[6,j]: If j is an internal node: 1 if the feature chosen for j is scale, otherwise the size of the subset of values 
+#			 									  stored in rows 7,8,... if j is categorical 
+#						 						  If j is a leaf node: number of misclassified samples reaching at node j 
+#	 									- M[7:,j]: If j is an internal node: Threshold the example's feature value is compared to is stored at M[7,j] 
+#							   					   if the feature chosen for j is scale, otherwise if the feature chosen for j is categorical rows 7,8,... 
+#												   depict the value subset chosen for j
+#	          									   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# C 			String   " "		  Location to read the counts matrix containing the number of times samples are chosen in each tree of the random forest
+# P				String   ---		  Location to store the label predictions for X
+# A     		String   " "          Location to store the test accuracy (%) for the prediction if requested
+# OOB 			String   " "		  If C is provided location to store the Out-Of-Bag (OOB) error of the learned model 
+# CM     		String   " "		  Location to store the confusion matrix if requested 
+# fmt     	    String   "text"       The output format of the output, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+#	1- Matrix Y containing the predicted labels for X 
+#   2- Test accuracy if requested
+#   3- Confusion matrix C if requested
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f random-forest-predict.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
+#														A=OUTPUT_DIR/accurcay CM=OUTPUT_DIR/confusion fmt=csv
+
+fileX = $X;
+fileM = $M;
+fileP = $P;
+fileY = ifdef ($Y, " ");
+fileR = ifdef ($R, " ");
+fileC = ifdef ($C, " ");
+fileOOB = ifdef ($OOB, " ");
+fileCM = ifdef ($CM, " ");
+fileA = ifdef ($A, " ");
+fmtO = ifdef ($fmt, "text");
+X = read (fileX);
+M = read (fileM);
+
+num_records = nrow (X);
+Y_predicted = matrix (0, rows = num_records, cols = 1);
+num_trees  = max (M[2,]);
+num_labels = max (M[5,]);
+num_nodes_per_tree = aggregate (target = t (M[2,]), groups = t (M[2,]), fn = "count");
+num_nodes_per_tree_cum = cumsum (num_nodes_per_tree);
+
+R_cat = matrix (0, rows = 1, cols = 1);
+R_scale = matrix (0, rows = 1, cols = 1);
+
+if (fileR != " ") {
+	R = read (fileR);
+	dummy_coded = ppred (R[,2], R[,3], "!=");
+	R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
+	R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
+} else { # only scale features available
+	R_scale = seq (1, ncol (X));
+}
+
+if (fileC != " ") {
+	C = read (fileC);
+	label_counts_oob = matrix (0, rows = num_records, cols = num_labels);
+}
+
+label_counts = matrix (0, rows = num_records, cols = num_labels); 
+parfor (i in 1:num_records, check = 0) {
+	cur_sample = X[i,];
+	cur_node_pos = 1;
+	# cur_node = 1;
+	cur_tree = 1;
+	start_ind = 1;
+	labels_found = FALSE;
+	while (!labels_found) {
+		
+		cur_feature = as.scalar (M[4,cur_node_pos]);
+		type_label = as.scalar (M[5,cur_node_pos]);
+		if (cur_feature == 0) { # leaf found
+			label_counts[i,type_label] = label_counts[i,type_label] + 1;
+			if (fileC != " ") {
+				if (as.scalar (C[i,cur_tree]) == 0) label_counts_oob[i,type_label] = label_counts_oob[i,type_label] + 1;
+			}
+			if (cur_tree < num_trees) {
+				cur_node_pos = as.scalar (num_nodes_per_tree_cum[cur_tree,]) + 1;
+			} else if (cur_tree == num_trees) {
+				labels_found = TRUE;
+			}
+			cur_tree = cur_tree + 1;
+		} else {
+			# determine type: 1 for scale, 2 for categorical 
+			if (type_label == 1) { # scale feature
+				cur_start_ind = as.scalar (R_scale[cur_feature,]);
+				cur_value = as.scalar (cur_sample[,cur_start_ind]);
+				cur_split = as.scalar (M[7,cur_node_pos]);
+				if (cur_value < cur_split) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);					
+				}
+			} else if (type_label == 2) { # categorical feature				
+				cur_start_ind = as.scalar (R_cat[cur_feature,1]);
+				cur_end_ind = as.scalar (R_cat[cur_feature,2]);					
+				cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); 
+				cur_offset = as.scalar (M[6,cur_node_pos]);
+				value_found = sum (ppred (M[7:(7 + cur_offset - 1),cur_node_pos], cur_value, "=="));
+				if (value_found >= 1) { # go to left branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]);
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);
+				} else { # go to right branch
+					cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]) + 1;
+					# cur_node = as.scalar (cur_M[1,cur_node_pos]);						
+				}
+		
+			}
+}}}
+
+Y_predicted = rowIndexMax (label_counts);
+write (Y_predicted, fileP, format = fmtO);
+
+if (fileY != " ") {
+	Y_dummy = read (fileY);
+	num_classes = ncol (Y_dummy);
+	Y = rowSums (Y_dummy * t (seq (1, num_classes)));
+	result = ppred (Y, Y_predicted, "==");
+	result = sum (result);
+	accuracy = result / num_records * 100;
+	acc_str = "Accuracy (%): " + accuracy;
+	if (fileA != " ") {
+		write (acc_str, fileA, format = fmtO);
+	} else {
+		print (acc_str);
+	}
+	if (fileC != " ") {
+		oob_ind = ppred (rowSums (label_counts_oob), 0, ">")
+		label_counts_oob = removeEmpty (target = label_counts_oob, margin = "rows");
+		num_oob = nrow (label_counts_oob);
+		Y_predicted_oob = rowIndexMax (label_counts_oob);
+		Y_oob = removeEmpty (target = Y * oob_ind, margin = "rows");
+		result = ppred (Y_oob, Y_predicted_oob, "==");
+		oob_error = (1 - (sum (result) / num_oob)) * 100;
+		oob_str = "Out-Of-Bag error (%): " + oob_error;
+		if (fileOOB != " ") {
+			write (oob_str, fileOOB, format = fmtO);
+		} else {
+			print (oob_str);
+		}
+	}
+	if (fileCM != " ") {
+		confusion_mat = table(Y_predicted, Y, num_classes, num_classes)
+        write(confusion_mat, fileCM, format = fmtO)
+	}
+}