You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:34:05 UTC

[29/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/IQMTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/IQMTest.dml b/src/test/scripts/applications/descriptivestats/IQMTest.dml
index f0b9477..d6234ab 100644
--- a/src/test/scripts/applications/descriptivestats/IQMTest.dml
+++ b/src/test/scripts/applications/descriptivestats/IQMTest.dml
@@ -1,35 +1,35 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-$$readhelper$$
-
-V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
-W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
-
-# inter quartile mean
-iqm = interQuartileMean(V)
-iqmHelper1 = iqm * Helper;
-write(iqmHelper1, "$$outdir$$iqm", format="text");
-
-# weighted inter quartile mean
-wiqm = interQuartileMean(V, W)
-iqmHelper2 = wiqm * Helper;
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+$$readhelper$$
+
+V = read("$$indir$$vector", rows=$$rows$$, cols=1, format="text")
+W = read("$$indir$$weight", rows=$$rows$$, cols=1, format="text")
+
+# inter quartile mean
+iqm = interQuartileMean(V)
+iqmHelper1 = iqm * Helper;
+write(iqmHelper1, "$$outdir$$iqm", format="text");
+
+# weighted inter quartile mean
+wiqm = interQuartileMean(V, W)
+iqmHelper2 = wiqm * Helper;
 write(iqmHelper2, "$$outdir$$weighted_iqm", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OddsRatio.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OddsRatio.R b/src/test/scripts/applications/descriptivestats/OddsRatio.R
index 6649e94..4994935 100644
--- a/src/test/scripts/applications/descriptivestats/OddsRatio.R
+++ b/src/test/scripts/applications/descriptivestats/OddsRatio.R
@@ -1,75 +1,75 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $CC_HOME is set to the home of the R script
-# Rscript $CC_HOME/OddsRato.R $CC_HOME/in/ $CC_HOME/expected/
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-B = readMM(paste(args[1], "B.mtx", sep=""));
-
-F = table(A[,1],B[,1]);
-
-a11 = F[1,1];
-a12 = F[1,2];
-a21 = F[2,1];
-a22 = F[2,2];
-
-#print(paste(a11, " ", a12, " ", a21, " ", a22));
-
-oddsRatio = (a11*a22)/(a12*a21);
-sigma = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
-left_conf = exp( log(oddsRatio) - 2*sigma )
-right_conf = exp( log(oddsRatio) + 2*sigma )
-sigma_away = abs( log(oddsRatio)/sigma )
-
-# chisq.test returns a list containing statistic, p-value, etc.
-cst = chisq.test(F);
-
-# get the chi-squared coefficient from the list
-chi_squared = as.numeric(cst[1]);
-degFreedom =  as.numeric(cst[2]);
-pValue = as.numeric(cst[3]);
-
-q = min(dim(F));
-W = sum(F);
-cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-
-#print(paste(oddsRatio, " ", sigma, " [", left_conf, ",", right_conf, "] ", sigma_away, " "));
-#print(paste(chi_squared, " ", degFreedom, " [", pValue, ",", cramers_v, "] "));
-
-write(oddsRatio, paste(args[2], "oddsRatio", sep=""));
-write(sigma, paste(args[2], "sigma", sep=""));
-write(left_conf, paste(args[2], "leftConf", sep=""));
-write(right_conf, paste(args[2], "rightConf", sep=""));
-write(sigma_away, paste(args[2], "sigmasAway", sep=""));
-
-#write(chi_squared, paste(args[2], "chiSquared", sep=""));
-#write(degFreedom, paste(args[2], "degFreedom", sep=""));
-#write(pValue, paste(args[2], "pValue", sep=""));
-#write(cramers_v, paste(args[2], "cramersV", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $CC_HOME is set to the home of the R script
+# Rscript $CC_HOME/OddsRato.R $CC_HOME/in/ $CC_HOME/expected/
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+B = readMM(paste(args[1], "B.mtx", sep=""));
+
+F = table(A[,1],B[,1]);
+
+a11 = F[1,1];
+a12 = F[1,2];
+a21 = F[2,1];
+a22 = F[2,2];
+
+#print(paste(a11, " ", a12, " ", a21, " ", a22));
+
+oddsRatio = (a11*a22)/(a12*a21);
+sigma = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
+left_conf = exp( log(oddsRatio) - 2*sigma )
+right_conf = exp( log(oddsRatio) + 2*sigma )
+sigma_away = abs( log(oddsRatio)/sigma )
+
+# chisq.test returns a list containing statistic, p-value, etc.
+cst = chisq.test(F);
+
+# get the chi-squared coefficient from the list
+chi_squared = as.numeric(cst[1]);
+degFreedom =  as.numeric(cst[2]);
+pValue = as.numeric(cst[3]);
+
+q = min(dim(F));
+W = sum(F);
+cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+
+#print(paste(oddsRatio, " ", sigma, " [", left_conf, ",", right_conf, "] ", sigma_away, " "));
+#print(paste(chi_squared, " ", degFreedom, " [", pValue, ",", cramers_v, "] "));
+
+write(oddsRatio, paste(args[2], "oddsRatio", sep=""));
+write(sigma, paste(args[2], "sigma", sep=""));
+write(left_conf, paste(args[2], "leftConf", sep=""));
+write(right_conf, paste(args[2], "rightConf", sep=""));
+write(sigma_away, paste(args[2], "sigmasAway", sep=""));
+
+#write(chi_squared, paste(args[2], "chiSquared", sep=""));
+#write(degFreedom, paste(args[2], "degFreedom", sep=""));
+#write(pValue, paste(args[2], "pValue", sep=""));
+#write(cramers_v, paste(args[2], "cramersV", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OddsRatio.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OddsRatio.dml b/src/test/scripts/applications/descriptivestats/OddsRatio.dml
index 8a62389..ae52e03 100644
--- a/src/test/scripts/applications/descriptivestats/OddsRatio.dml
+++ b/src/test/scripts/applications/descriptivestats/OddsRatio.dml
@@ -1,114 +1,114 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-# DML script to compute oddsRatio and related confidence intervals
-# Input: two column vectors with categorical values w/ number of categories = 2
-
-#A1 = Rand(rows=1400, cols=1, min=1, max=2); 
-#A2 = Rand(rows=1400, cols=1, min=1, max=2); 
-#A1 = round(A1);
-#A2 = round(A2);
-
-A1 = read($1, rows=$2, cols=1);
-A2 = read($3, rows=$2, cols=1);
-
-F = table(A1, A2);
-
-# Perform computations only on 2x2 contingency tables
-if (nrow(F) != 2 | ncol(F) != 2) {
-	print(max(A1) + ", " + max(A2));
-	print("Only 2x2 tables are supported. Contingency table constructed from given data is [" + nrow(F) + ", " + ncol(F) + "]");
-}
-else {
-	[oddsRatio, left_conf, right_conf, sd, chisquared, pvalue, crv, sigma_away, degf] = pair_corr(F);
-	#print("Odds Ratio " + oddsRatio);
-	#print("Standard Devication " + sd);
-	#print("Confidence Interval [" + left_conf + "," + right_conf + "]");
-	#print("Howmany sigma's away [" + sigma_away);
-	#print("Chi-squared Test: statistic = " + chisquared + ", pValue = " + pvalue + ", Cramer's V = " + crv + ", Degrees of Freedom = " + degf);
-	
-	write(oddsRatio, $4);
-	write(sd, $5);
-	write(left_conf, $6);
-	write(right_conf, $7);
-	write(sigma_away, $8);
-	#write(chisquared, $9);
-	#write(degf, $10);
-	#write(pvalue, $11);
-	#write(crv, $12);
-}
-
-# -----------------------------------------------------------------------------------------------
-
-# Given a 2x2 contingency table, it computes oddsRatio and the corresponding confidence interval
-pair_corr = function(Matrix[Double] A) return (Double oddsRatio, Double left_conf, Double right_conf, Double sd, Double chisquared, Double pvalue, Double crv, Double sigma_away, Double df) {
-	a11 = castAsScalar(A[1,1]);
-	a12 = castAsScalar(A[1,2]);
-	a21 = castAsScalar(A[2,1]);
-	a22 = castAsScalar(A[2,2]);
-
-	sd = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
-	oddsRatio = (a11*a22)/(a12*a21);
-
-	[chisquared, df, pvalue, crv] = bivar_cc(A);
-
-	left_conf = exp( log(oddsRatio) - 2*sd )
-	right_conf = exp( log(oddsRatio) + 2*sd )
-	sigma_away = abs( log(oddsRatio)/sd )
-}
-
-# -----------------------------------------------------------------------------------------------
-
-# Given a contingency table, perform the chi-squared test.
-bivar_cc = function(Matrix[Double] F) return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    # F = ctable(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = degFreedom;
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+# DML script to compute oddsRatio and related confidence intervals
+# Input: two column vectors with categorical values w/ number of categories = 2
+
+#A1 = Rand(rows=1400, cols=1, min=1, max=2); 
+#A2 = Rand(rows=1400, cols=1, min=1, max=2); 
+#A1 = round(A1);
+#A2 = round(A2);
+
+A1 = read($1, rows=$2, cols=1);
+A2 = read($3, rows=$2, cols=1);
+
+F = table(A1, A2);
+
+# Perform computations only on 2x2 contingency tables
+if (nrow(F) != 2 | ncol(F) != 2) {
+	print(max(A1) + ", " + max(A2));
+	print("Only 2x2 tables are supported. Contingency table constructed from given data is [" + nrow(F) + ", " + ncol(F) + "]");
+}
+else {
+	[oddsRatio, left_conf, right_conf, sd, chisquared, pvalue, crv, sigma_away, degf] = pair_corr(F);
+	#print("Odds Ratio " + oddsRatio);
+	#print("Standard Devication " + sd);
+	#print("Confidence Interval [" + left_conf + "," + right_conf + "]");
+	#print("Howmany sigma's away [" + sigma_away);
+	#print("Chi-squared Test: statistic = " + chisquared + ", pValue = " + pvalue + ", Cramer's V = " + crv + ", Degrees of Freedom = " + degf);
+	
+	write(oddsRatio, $4);
+	write(sd, $5);
+	write(left_conf, $6);
+	write(right_conf, $7);
+	write(sigma_away, $8);
+	#write(chisquared, $9);
+	#write(degf, $10);
+	#write(pvalue, $11);
+	#write(crv, $12);
+}
+
+# -----------------------------------------------------------------------------------------------
+
+# Given a 2x2 contingency table, it computes oddsRatio and the corresponding confidence interval
+pair_corr = function(Matrix[Double] A) return (Double oddsRatio, Double left_conf, Double right_conf, Double sd, Double chisquared, Double pvalue, Double crv, Double sigma_away, Double df) {
+	a11 = castAsScalar(A[1,1]);
+	a12 = castAsScalar(A[1,2]);
+	a21 = castAsScalar(A[2,1]);
+	a22 = castAsScalar(A[2,2]);
+
+	sd = sqrt(1/a11 + 1/a12 + 1/a21 + 1/a22);
+	oddsRatio = (a11*a22)/(a12*a21);
+
+	[chisquared, df, pvalue, crv] = bivar_cc(A);
+
+	left_conf = exp( log(oddsRatio) - 2*sd )
+	right_conf = exp( log(oddsRatio) + 2*sd )
+	sigma_away = abs( log(oddsRatio)/sd )
+}
+
+# -----------------------------------------------------------------------------------------------
+
+# Given a contingency table, perform the chi-squared test.
+bivar_cc = function(Matrix[Double] F) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    # F = ctable(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = degFreedom;
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
index f3fe12d..f7d6de1 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalTest.java
-# command line invocation assuming $OO_HOME is set to the home of the R script
-# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""))
-B = readMM(paste(args[1], "B.mtx", sep=""))
-
-spearman = cor(A[,1],B[,1], method="spearman");
-
-#paste("R value", spearman);
-
-write(spearman, paste(args[2], "Spearman", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalTest.java
+# command line invocation assuming $OO_HOME is set to the home of the R script
+# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""))
+B = readMM(paste(args[1], "B.mtx", sep=""))
+
+spearman = cor(A[,1],B[,1], method="spearman");
+
+#paste("R value", spearman);
+
+write(spearman, paste(args[2], "Spearman", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
index a30818a..cb153d0 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinal.dml
@@ -1,74 +1,74 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script OrdinalOrdinal.dml?
-# Assume OO_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and B
-# hadoop jar SystemML.jar -f $OO_HOME/OrdinalOrdinal.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/Spearman"
-
-#-------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(matrix[double] X) return (matrix[double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-#-------------------------------------------------------------------------
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-
-# compute contingency table
-F = table(A,B);
-
-catA = nrow(F);  # number of categories in A
-catB = ncol(F);  # number of categories in B
-
-# compute category-wise counts for both the attributes
-R = rowSums(F);
-S = colSums(F);
-
-# compute scores, both are column vectors
-[C] = computeRanks(R);
-meanX = mean(C,R); 
-
-columnS = t(S);
-[D] = computeRanks(columnS);
-
-# scores (C,D) are individual values, and counts (R,S) act as weights
-meanY = mean(D,columnS);
-
-W = sum(F); # total weight, or total #cases
-varX = moment(C,R,2)*(W/(W-1.0));
-varY = moment(D,columnS,2)*(W/(W-1.0));
-covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-sp = covXY/(sqrt(varX)*sqrt(varY));
-
-#print("X: mean " + meanX + ", var " + varX);
-#print("Y: mean " + meanY + ", var " + varY);
-#print("covXY: " + sp);
-
-#sp = spearman(A,B);
-
-write(sp, $4);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script OrdinalOrdinal.dml?
+# Assume OO_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and B
+# hadoop jar SystemML.jar -f $OO_HOME/OrdinalOrdinal.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/B" "$OUPUT_DIR/Spearman"
+
+#-------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(matrix[double] X) return (matrix[double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+#-------------------------------------------------------------------------
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+
+# compute contingency table
+F = table(A,B);
+
+catA = nrow(F);  # number of categories in A
+catB = ncol(F);  # number of categories in B
+
+# compute category-wise counts for both the attributes
+R = rowSums(F);
+S = colSums(F);
+
+# compute scores, both are column vectors
+[C] = computeRanks(R);
+meanX = mean(C,R); 
+
+columnS = t(S);
+[D] = computeRanks(columnS);
+
+# scores (C,D) are individual values, and counts (R,S) act as weights
+meanY = mean(D,columnS);
+
+W = sum(F); # total weight, or total #cases
+varX = moment(C,R,2)*(W/(W-1.0));
+varY = moment(D,columnS,2)*(W/(W-1.0));
+covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+sp = covXY/(sqrt(varX)*sqrt(varY));
+
+#print("X: mean " + meanX + ", var " + varX);
+#print("Y: mean " + meanY + ", var " + varY);
+#print("covXY: " + sp);
+
+#sp = spearman(A,B);
+
+write(sp, $4);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
index addc779..0c244da 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.R
@@ -1,46 +1,46 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalWithWeightsTest.java
-# command line invocation assuming $OO_HOME is set to the home of the R script
-# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-Atemp  = readMM(paste(args[1], "A.mtx", sep=""))
-Btemp  = readMM(paste(args[1], "B.mtx", sep=""))
-WMtemp = readMM(paste(args[1], "WM.mtx", sep=""))
-
-#Atemp  = readMM(file="$$indir$$A.mtx"); #readMM(paste(args[1], "A.mtx", sep=""))
-#Btemp  = readMM(file="$$indir$$B.mtx"); #readMM(paste(args[1], "B.mtx", sep=""))
-#WMtemp = readMM(file="$$indir$$WM.mtx"); #readMM(paste(args[1], "WM.mtx", sep=""))
-
-A = rep(Atemp[,1],WMtemp[,1])
-B = rep(Btemp[,1],WMtemp[,1])
-
-spearman = cor(A, B, method="spearman");
-
-#paste("Weighted R value", spearman);
-
-write(spearman, paste(args[2], "Spearman", sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateOrdinalOrdinalWithWeightsTest.java
+# command line invocation assuming $OO_HOME is set to the home of the R script
+# Rscript $OO_HOME/OrdinalOrdinal.R $OO_HOME/in/ $OO_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+Atemp  = readMM(paste(args[1], "A.mtx", sep=""))
+Btemp  = readMM(paste(args[1], "B.mtx", sep=""))
+WMtemp = readMM(paste(args[1], "WM.mtx", sep=""))
+
+#Atemp  = readMM(file="$$indir$$A.mtx"); #readMM(paste(args[1], "A.mtx", sep=""))
+#Btemp  = readMM(file="$$indir$$B.mtx"); #readMM(paste(args[1], "B.mtx", sep=""))
+#WMtemp = readMM(file="$$indir$$WM.mtx"); #readMM(paste(args[1], "WM.mtx", sep=""))
+
+A = rep(Atemp[,1],WMtemp[,1])
+B = rep(Btemp[,1],WMtemp[,1])
+
+spearman = cor(A, B, method="spearman");
+
+#paste("Weighted R value", spearman);
+
+write(spearman, paste(args[2], "Spearman", sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
index 9af57fa..e6b5143 100644
--- a/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/OrdinalOrdinalWithWeightsTest.dml
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#A <- Ordinal
-#B <- Ordinal
-#WM <- Weights
-
-#A = read("$$indir$$A", rows=$$rows$$, cols=1, format="text");
-#B = read("$$indir$$B", rows=$$rows$$, cols=1, format="text");
-#WM = read("$$indir$$WM", rows=$$rows$$, cols=1, format="text");
-
-A = read($1, rows=$2, cols=1, format="text");
-B = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-# compute contingency table
-F = table(A,B,WM);
-
-catA = nrow(F);  # number of categories in A
-catB = ncol(F);  # number of categories in B
-
-# compute category-wise counts for both the attributes
-R = rowSums(F);
-S = colSums(F);
-
-# compute scores, both are column vectors
-[C] = computeRanks(R);
-meanX = mean(C,R); 
-
-columnS = t(S);
-[D] = computeRanks(columnS);
-
-# scores (C,D) are individual values, and counts (R,S) act as weights
-meanY = mean(D,columnS);
-
-W = sum(F); # total weight, or total #cases
-varX = moment(C,R,2)*(W/(W-1.0));
-varY = moment(D,columnS,2)*(W/(W-1.0));
-covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-sp = covXY/(sqrt(varX)*sqrt(varY));
-
-#print("X: mean " + meanX + ", var " + varX);
-#print("Y: mean " + meanY + ", var " + varY);
-#print("covXY: " + sp);
-
-#sp = spearman(A,B,WM);
-
-write(sp, $5);
-
-
-#-------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-#-------------------------------------------------------------------------
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+#A <- Ordinal
+#B <- Ordinal
+#WM <- Weights
+
+#A = read("$$indir$$A", rows=$$rows$$, cols=1, format="text");
+#B = read("$$indir$$B", rows=$$rows$$, cols=1, format="text");
+#WM = read("$$indir$$WM", rows=$$rows$$, cols=1, format="text");
+
+A = read($1, rows=$2, cols=1, format="text");
+B = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+# compute contingency table
+F = table(A,B,WM);
+
+catA = nrow(F);  # number of categories in A
+catB = ncol(F);  # number of categories in B
+
+# compute category-wise counts for both the attributes
+R = rowSums(F);
+S = colSums(F);
+
+# compute scores, both are column vectors
+[C] = computeRanks(R);
+meanX = mean(C,R); 
+
+columnS = t(S);
+[D] = computeRanks(columnS);
+
+# scores (C,D) are individual values, and counts (R,S) act as weights
+meanY = mean(D,columnS);
+
+W = sum(F); # total weight, or total #cases
+varX = moment(C,R,2)*(W/(W-1.0));
+varY = moment(D,columnS,2)*(W/(W-1.0));
+covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+sp = covXY/(sqrt(varX)*sqrt(varY));
+
+#print("X: mean " + meanX + ", var " + varX);
+#print("Y: mean " + meanY + ", var " + varY);
+#print("covXY: " + sp);
+
+#sp = spearman(A,B,WM);
+
+write(sp, $5);
+
+
+#-------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+#-------------------------------------------------------------------------
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/QuantileTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/QuantileTest.dml b/src/test/scripts/applications/descriptivestats/QuantileTest.dml
index a8f2f6d..233fcdf 100644
--- a/src/test/scripts/applications/descriptivestats/QuantileTest.dml
+++ b/src/test/scripts/applications/descriptivestats/QuantileTest.dml
@@ -1,34 +1,34 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-$$readhelper$$
-
-V = read("$$indir$$vector", rows=$$rows1$$, cols=1, format="text")
-W = read("$$indir$$weight", rows=$$rows1$$, cols=1, format="text")
-P = read("$$indir$$prob", rows=$$rows2$$, cols=1, format="text")
-
-# quantile
-Q = quantile(V, P)
-write(Q, "$$outdir$$quantile", format="text");
-
-# weighted quantile
-WQ = quantile(V, W, P)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+$$readhelper$$
+
+V = read("$$indir$$vector", rows=$$rows1$$, cols=1, format="text")
+W = read("$$indir$$weight", rows=$$rows1$$, cols=1, format="text")
+P = read("$$indir$$prob", rows=$$rows2$$, cols=1, format="text")
+
+# quantile
+Q = quantile(V, P)
+write(Q, "$$outdir$$quantile", format="text");
+
+# weighted quantile
+WQ = quantile(V, W, P)
 write(WQ, "$$outdir$$weighted_quantile", format="text");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/Scale.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Scale.R b/src/test/scripts/applications/descriptivestats/Scale.R
index 0027c87..45c9efb 100644
--- a/src/test/scripts/applications/descriptivestats/Scale.R
+++ b/src/test/scripts/applications/descriptivestats/Scale.R
@@ -1,141 +1,141 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
-# command line invocation assuming $S_HOME is set to the home of the R script
-# Rscript $S_HOME/Scale.R $S_HOME/in/ $S_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-options(repos="http://cran.stat.ucla.edu/") 
-is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
-
-is_plotrix = is.installed("plotrix");
-if ( !is_plotrix ) {
-install.packages("plotrix");
-} 
-library("plotrix");
-
-is_psych = is.installed("psych");
-if ( !is_psych ) {
-install.packages("psych");
-} 
-library("psych")
-
-is_moments = is.installed("moments");
-if( !is_moments){
-install.packages("moments");
-}
-library("moments")
-
-#library("batch")
-library("Matrix")
-
-V = readMM(paste(args[1], "vector.mtx", sep=""))
-P = readMM(paste(args[1], "prob.mtx", sep=""))
-
-n = nrow(V)
-
-# mean
-mu = mean(V)
-
-# variances
-var = var(V[,1])
-
-# standard deviations
-std_dev = sd(V[,1], na.rm = FALSE)
-
-# standard errors of mean
-SE = std.error(V[,1], na.rm)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
-# har_mu = harmonic.mean(V[,1]) -- DML does not support this yet
-
-# geometric means is not currently supported.
-# geom_mu = geometric.mean(V[,1]) -- DML does not support this yet
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = moment(V[,1], order=3, central=TRUE)/(std_dev^3)
-
-# standard error of skewness (not sure how it is defined without the weight)
-se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
-
-# Kurtosis (using binomial formula)
-g2 = moment(V[,1], order=4, central=TRUE)/(var^2)-3
-
-# Standard error of Kurtosis (not sure how it is defined without the weight)
-se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
-
-# median
-md = median(V[,1]) #quantile(V[,1], 0.5, type = 1)
-
-# quantile
-Q = t(quantile(V[,1], P[,1], type = 1))
-
-# inter-quartile mean
-S=c(sort(V[,1]))
-
-q25d=n*0.25
-q75d=n*0.75
-q25i=ceiling(q25d)
-q75i=ceiling(q75d)
-
-iqm = sum(S[(q25i+1):q75i])
-iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
-iqm = iqm/(n*0.5)
-
-#print(paste("IQM ", iqm));
-
-# outliers use ppred to describe it
-out_minus = t(as.numeric(V< mu-5*std_dev)*V) 
-out_plus = t(as.numeric(V> mu+5*std_dev)*V)
-
-write(mu, paste(args[2], "mean", sep=""));
-write(std_dev, paste(args[2], "std", sep=""));
-write(SE, paste(args[2], "se", sep=""));
-write(var, paste(args[2], "var", sep=""));
-write(cv, paste(args[2], "cv", sep=""));
-# write(har_mu),paste(args[2], "har", sep=""));
-# write(geom_mu, paste(args[2], "geom", sep=""));
-write(mn, paste(args[2], "min", sep=""));
-write(mx, paste(args[2], "max", sep=""));
-write(rng, paste(args[2], "rng", sep=""));
-write(g1, paste(args[2], "g1", sep=""));
-write(se_g1, paste(args[2], "se_g1", sep=""));
-write(g2, paste(args[2], "g2", sep=""));
-write(se_g2, paste(args[2], "se_g2", sep=""));
-write(md, paste(args[2], "median", sep=""));
-write(iqm, paste(args[2], "iqm", sep=""));
-writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
-writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
-writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.UnivariateStatsTest.java
+# command line invocation assuming $S_HOME is set to the home of the R script
+# Rscript $S_HOME/Scale.R $S_HOME/in/ $S_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+options(repos="http://cran.stat.ucla.edu/") 
+is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
+
+is_plotrix = is.installed("plotrix");
+if ( !is_plotrix ) {
+install.packages("plotrix");
+} 
+library("plotrix");
+
+is_psych = is.installed("psych");
+if ( !is_psych ) {
+install.packages("psych");
+} 
+library("psych")
+
+is_moments = is.installed("moments");
+if( !is_moments){
+install.packages("moments");
+}
+library("moments")
+
+#library("batch")
+library("Matrix")
+
+V = readMM(paste(args[1], "vector.mtx", sep=""))
+P = readMM(paste(args[1], "prob.mtx", sep=""))
+
+n = nrow(V)
+
+# mean
+mu = mean(V)
+
+# variances
+var = var(V[,1])
+
+# standard deviations
+std_dev = sd(V[,1], na.rm = FALSE)
+
+# standard errors of mean
+SE = std.error(V[,1], na.rm)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# harmonic means (note: may generate out of memory for large sparse matrices becauses of NaNs)
+# har_mu = harmonic.mean(V[,1]) -- DML does not support this yet
+
+# geometric means is not currently supported.
+# geom_mu = geometric.mean(V[,1]) -- DML does not support this yet
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = moment(V[,1], order=3, central=TRUE)/(std_dev^3)
+
+# standard error of skewness (not sure how it is defined without the weight)
+se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
+
+# Kurtosis (using binomial formula)
+g2 = moment(V[,1], order=4, central=TRUE)/(var^2)-3
+
+# Standard error of Kurtosis (not sure how it is defined without the weight)
+se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5)*(n-3)) )
+
+# median
+md = median(V[,1]) #quantile(V[,1], 0.5, type = 1)
+
+# quantile
+Q = t(quantile(V[,1], P[,1], type = 1))
+
+# inter-quartile mean
+S=c(sort(V[,1]))
+
+q25d=n*0.25
+q75d=n*0.75
+q25i=ceiling(q25d)
+q75i=ceiling(q75d)
+
+iqm = sum(S[(q25i+1):q75i])
+iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+iqm = iqm/(n*0.5)
+
+#print(paste("IQM ", iqm));
+
+# outliers use ppred to describe it
+out_minus = t(as.numeric(V< mu-5*std_dev)*V) 
+out_plus = t(as.numeric(V> mu+5*std_dev)*V)
+
+write(mu, paste(args[2], "mean", sep=""));
+write(std_dev, paste(args[2], "std", sep=""));
+write(SE, paste(args[2], "se", sep=""));
+write(var, paste(args[2], "var", sep=""));
+write(cv, paste(args[2], "cv", sep=""));
+# write(har_mu),paste(args[2], "har", sep=""));
+# write(geom_mu, paste(args[2], "geom", sep=""));
+write(mn, paste(args[2], "min", sep=""));
+write(mx, paste(args[2], "max", sep=""));
+write(rng, paste(args[2], "rng", sep=""));
+write(g1, paste(args[2], "g1", sep=""));
+write(se_g1, paste(args[2], "se_g1", sep=""));
+write(g2, paste(args[2], "g2", sep=""));
+write(se_g2, paste(args[2], "se_g2", sep=""));
+write(md, paste(args[2], "median", sep=""));
+write(iqm, paste(args[2], "iqm", sep=""));
+writeMM(as(t(out_minus),"CsparseMatrix"), paste(args[2], "out_minus", sep=""), format="text");
+writeMM(as(t(out_plus),"CsparseMatrix"), paste(args[2], "out_plus", sep=""), format="text");
+writeMM(as(t(Q),"CsparseMatrix"), paste(args[2], "quantile", sep=""), format="text");
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/Scale.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/Scale.dml b/src/test/scripts/applications/descriptivestats/Scale.dml
index b59c235..e2362cb 100644
--- a/src/test/scripts/applications/descriptivestats/Scale.dml
+++ b/src/test/scripts/applications/descriptivestats/Scale.dml
@@ -1,114 +1,114 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script Scale.dml?
-# Assume S_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for V and rows = 5 for P
-# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/prob 5
-# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
-# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
-# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
-# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
-# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
-
-V = read($1, rows=$2, cols=1, format="text")
-P = read($3, rows=$4, cols=1, format="text")
-
-n = nrow(V)
-
-# sum
-s1 = sum(V)
-
-# 2nd central moment
-m2 = moment(V, 2)
-
-# 3rd central moment
-m3 = moment(V, 3)
-
-# 4th central moment
-m4 = moment(V, 4)
-
-# mean
-mu = mean(V)
-
-# variances
-var = n/(n-1.0)*m2
-
-# standard deviations
-std_dev = sqrt(var)
-
-# standard errors of mean
-SE = std_dev/sqrt(n)
-
-# coefficients of variation
-cv = std_dev/mu
-
-# min and max
-mn=min(V)
-mx=max(V)
-
-# range
-rng = mx - mn
-
-# Skewness
-g1 = m3/(std_dev^3)
-
-# standard error of skewness (not sure how it is defined without the weight)
-se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
-
-# Kurtosis (using binomial formula)
-g2 = m4/(std_dev^4) - 3
-
-# Standard error of Kurtosis (not sure how it is defined without the weight)
-se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5.0)*(n-3.0)) )
-
-# median
-md = median(V)
-
-# quantile
-Q = quantile(V, P)
-
-# inter-quartile mean
-iqm = interQuartileMean(V)
-
-# outliers use ppred to describe it
-out_minus = ppred(V, mu-5*std_dev, "<")*V 
-out_plus = ppred(V, mu+5*std_dev, ">")*V
-
-write(mu, $5);
-write(std_dev, $6);
-write(SE, $7);
-write(var, $8);
-write(cv, $9);
-write(mn, $10);
-write(mx, $11);
-write(rng, $12);
-write(g1, $13);
-write(se_g1, $14);
-write(g2, $15);
-write(se_g2, $16);
-write(md, $17);
-write(iqm, $18);
-write(out_minus, $19, format="text");
-write(out_plus, $20, format="text");
-write(Q, $21, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script Scale.dml?
+# Assume S_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for V and rows = 5 for P
+# hadoop jar SystemML.jar -f $S_HOME/Scale.dml -args "$INPUT_DIR/vector" 10000 "$INPUT_DIR/prob 5
+# "$OUTPUT_DIR/mean" "$OUTPUT_DIR/std" "$OUTPUT_DIR/se" "$OUTPUT_DIR/var" "$OUTPUT_DIR/cv"
+# "$OUTPUT_DIR/min" "$OUTPUT_DIR/max" "$OUTPUT_DIR/rng" 
+# "$OUTPUT_DIR/g1" "$OUTPUT_DIR/se_g1" "$OUTPUT_DIR/g2" "$OUTPUT_DIR/se_g2" 
+# "$OUTPUT_DIR/median" "$OUTPUT_DIR/iqm"
+# "OUTPUT_DIR/out_minus" "$OUTPUT_DIR/out_plus" "$OUTPUT_DIR/quantile" 
+
+V = read($1, rows=$2, cols=1, format="text")
+P = read($3, rows=$4, cols=1, format="text")
+
+n = nrow(V)
+
+# sum
+s1 = sum(V)
+
+# 2nd central moment
+m2 = moment(V, 2)
+
+# 3rd central moment
+m3 = moment(V, 3)
+
+# 4th central moment
+m4 = moment(V, 4)
+
+# mean
+mu = mean(V)
+
+# variances
+var = n/(n-1.0)*m2
+
+# standard deviations
+std_dev = sqrt(var)
+
+# standard errors of mean
+SE = std_dev/sqrt(n)
+
+# coefficients of variation
+cv = std_dev/mu
+
+# min and max
+mn=min(V)
+mx=max(V)
+
+# range
+rng = mx - mn
+
+# Skewness
+g1 = m3/(std_dev^3)
+
+# standard error of skewness (not sure how it is defined without the weight)
+se_g1=sqrt( 6*n*(n-1.0) / ((n-2.0)*(n+1.0)*(n+3.0)) )
+
+# Kurtosis (using binomial formula)
+g2 = m4/(std_dev^4) - 3
+
+# Standard error of Kurtosis (not sure how it is defined without the weight)
+se_g2= sqrt( (4*(n^2-1)*se_g1^2)/((n+5.0)*(n-3.0)) )
+
+# median
+md = median(V)
+
+# quantile
+Q = quantile(V, P)
+
+# inter-quartile mean
+iqm = interQuartileMean(V)
+
+# outliers use ppred to describe it
+out_minus = ppred(V, mu-5*std_dev, "<")*V 
+out_plus = ppred(V, mu+5*std_dev, ">")*V
+
+write(mu, $5);
+write(std_dev, $6);
+write(SE, $7);
+write(var, $8);
+write(cv, $9);
+write(mn, $10);
+write(mx, $11);
+write(rng, $12);
+write(g1, $13);
+write(se_g1, $14);
+write(g2, $15);
+write(se_g2, $16);
+write(md, $17);
+write(iqm, $18);
+write(out_minus, $19, format="text");
+write(out_plus, $20, format="text");
+write(Q, $21, format="text");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategorical.R b/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
index 73bf9f3..e1d0880 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategorical.R
@@ -1,69 +1,69 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
-# command line invocation assuming $SC_HOME is set to the home of the R script
-# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-A = readMM(paste(args[1], "A.mtx", sep=""));
-Y = readMM(paste(args[1], "Y.mtx", sep=""));
-
-Av = A[,1];
-Yv = Y[,1];
-
-W = nrow(A);
-my = mean(Yv); #sum(Yv)/W;
-varY = var(Yv);
-
-CFreqs = as.matrix(table(Av)); 
-CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
-CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-print(anova_num, digits=15);
-print(anova_den, digits=15);
-
-write(Eta, paste(args[2], "Eta", sep=""));
-
-write(ANOVAF, paste(args[2], "AnovaF", sep=""));
-
-write(varY, paste(args[2], "VarY", sep=""));
-
-write(my, paste(args[2], "MeanY", sep=""));
-
-writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
-writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
-writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
+# command line invocation assuming $SC_HOME is set to the home of the R script
+# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""));
+Y = readMM(paste(args[1], "Y.mtx", sep=""));
+
+Av = A[,1];
+Yv = Y[,1];
+
+W = nrow(A);
+my = mean(Yv); #sum(Yv)/W;
+varY = var(Yv);
+
+CFreqs = as.matrix(table(Av)); 
+CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
+CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+print(anova_num, digits=15);
+print(anova_den, digits=15);
+
+write(Eta, paste(args[2], "Eta", sep=""));
+
+write(ANOVAF, paste(args[2], "AnovaF", sep=""));
+
+write(varY, paste(args[2], "VarY", sep=""));
+
+write(my, paste(args[2], "MeanY", sep=""));
+
+writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
+writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
+writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml b/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
index d0d5135..f9d7efb 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategorical.dml
@@ -1,62 +1,62 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleCategorical.dml?
-# Assume SC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
-# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" 
-#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
-#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
-
-A = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-
-# mean and variance in target variable
-W = nrow(A);
-my = mean(Y);
-varY = moment(Y,2) * W/(W-1.0);
-
-# category-wise (frequencies, means, variances)
-CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-CMeans = aggregate(target=Y, groups=A, fn="mean");
-CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-# output required statistics
-write(varY, $4);
-write(my, $5);
-
-write(CFreqs, $6, format="text");
-write(CMeans, $7, format="text");
-write(CVars, $8, format="text");
-
-write(Eta, $9);
-write(ANOVAF, $10);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleCategorical.dml?
+# Assume SC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
+# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" 
+#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
+#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
+
+A = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+
+# mean and variance in target variable
+W = nrow(A);
+my = mean(Y);
+varY = moment(Y,2) * W/(W-1.0);
+
+# category-wise (frequencies, means, variances)
+CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+CMeans = aggregate(target=Y, groups=A, fn="mean");
+CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+# output required statistics
+write(varY, $4);
+write(my, $5);
+
+write(CFreqs, $6, format="text");
+write(CMeans, $7, format="text");
+write(CVars, $8, format="text");
+
+write(Eta, $9);
+write(ANOVAF, $10);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
index 6e67716..bdf1cc2 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.R
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
-# command line invocation assuming $SC_HOME is set to the home of the R script
-# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-# Usage: R --vanilla -args Xfile X < ScaleCategoricalTest.R
-
-#parseCommandArgs()
-######################
-Atemp = readMM(paste(args[1], "A.mtx", sep=""));
-Ytemp = readMM(paste(args[1], "Y.mtx", sep=""));
-WM = readMM(paste(args[1], "WM.mtx", sep=""));
-
-Yv=rep(Ytemp[,1],WM[,1])
-Av=rep(Atemp[,1],WM[,1])
-
-W = sum(WM);
-my = sum(Yv)/W;
-varY = var(Yv);
-
-CFreqs = as.matrix(table(Av)); 
-CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
-CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-print(W, digits=15);
-print(R, digits=15);
-print(anova_num, digits=15);
-print(anova_den, digits=15);
-
-#######################
-
-write(Eta, paste(args[2], "Eta", sep=""));
-
-write(ANOVAF, paste(args[2], "AnovaF", sep=""));
-
-write(varY, paste(args[2], "VarY", sep=""));
-
-write(my, paste(args[2], "MeanY", sep=""));
-
-writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
-writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
-writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.BivariateScaleCategoricalTest.java
+# command line invocation assuming $SC_HOME is set to the home of the R script
+# Rscript $SC_HOME/ScaleCategorical.R $SC_HOME/in/ $SC_HOME/expected/
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+# Usage: R --vanilla -args Xfile X < ScaleCategoricalTest.R
+
+#parseCommandArgs()
+######################
+Atemp = readMM(paste(args[1], "A.mtx", sep=""));
+Ytemp = readMM(paste(args[1], "Y.mtx", sep=""));
+WM = readMM(paste(args[1], "WM.mtx", sep=""));
+
+Yv=rep(Ytemp[,1],WM[,1])
+Av=rep(Atemp[,1],WM[,1])
+
+W = sum(WM);
+my = sum(Yv)/W;
+varY = var(Yv);
+
+CFreqs = as.matrix(table(Av)); 
+CMeans = as.matrix(aggregate(Yv, by=list(Av), "mean")$x);
+CVars = as.matrix(aggregate(Yv, by=list(Av), "var")$x);
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+print(W, digits=15);
+print(R, digits=15);
+print(anova_num, digits=15);
+print(anova_den, digits=15);
+
+#######################
+
+write(Eta, paste(args[2], "Eta", sep=""));
+
+write(ANOVAF, paste(args[2], "AnovaF", sep=""));
+
+write(varY, paste(args[2], "VarY", sep=""));
+
+write(my, paste(args[2], "MeanY", sep=""));
+
+writeMM(as(CVars,"CsparseMatrix"), paste(args[2], "CVars", sep=""), format="text");
+writeMM(as(CFreqs,"CsparseMatrix"), paste(args[2], "CFreqs", sep=""), format="text");
+writeMM(as(CMeans,"CsparseMatrix"), paste(args[2], "CMeans", sep=""), format="text");
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
index b8186fb..7615d54 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleCategoricalWithWeightsTest.dml
@@ -1,65 +1,65 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleCategorical.dml?
-# Assume SC_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
-# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" 
-#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
-#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
-
-#A <- nominal variable
-#Y <- scale variable
-#WM <- weights
-
-A = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-WM = read($4, rows=$2, cols=1, format="text");
-
-W = sum(WM);
-my = sum(Y*WM)/W;
-varY = moment(Y,WM,2) * W/(W-1.0);
-
-CFreqs = aggregate(target=WM, groups=A, fn="sum");
-CMeans = aggregate(target=Y, groups=A, weights=WM, fn="mean");
-CVars =  aggregate(target=Y, groups=A, weights=WM, fn="variance");
-
-# number of categories
-R = nrow(CFreqs);
-
-Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-ANOVAF = anova_num/anova_den;
-
-# output required statistics
-write(varY, $5);
-write(my, $6);
-
-write(CFreqs, $7, format="text");
-write(CMeans, $8, format="text");
-write(CVars, $9, format="text");
-
-write(Eta, $10);
-write(ANOVAF, $11);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleCategorical.dml?
+# Assume SC_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume rows = 10000 for A and Y, A is categorical variable and Y is scale variable
+# hadoop jar SystemML.jar -f $SC_HOME/ScaleCategorical.dml -args "$INPUT_DIR/A" 10000 "$INPUT_DIR/Y" "$INPUT_DIR/WM" 
+#         "$OUPUT_DIR/VarY" "$OUTPUT_DIR/MeanY" "$OUTPUT_DIR/CFreqs" "$OUTPUT_DIR/CMeans" "$OUTPUT_DIR/CVars" 
+#         "$OUTPUT_DIR/Eta", "$OUTPUT_DIR/AnovaF"
+
+#A <- nominal variable
+#Y <- scale variable
+#WM <- weights
+
+A = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+WM = read($4, rows=$2, cols=1, format="text");
+
+W = sum(WM);
+my = sum(Y*WM)/W;
+varY = moment(Y,WM,2) * W/(W-1.0);
+
+CFreqs = aggregate(target=WM, groups=A, fn="sum");
+CMeans = aggregate(target=Y, groups=A, weights=WM, fn="mean");
+CVars =  aggregate(target=Y, groups=A, weights=WM, fn="variance");
+
+# number of categories
+R = nrow(CFreqs);
+
+Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ANOVAF = anova_num/anova_den;
+
+# output required statistics
+write(varY, $5);
+write(my, $6);
+
+write(CFreqs, $7, format="text");
+write(CMeans, $8, format="text");
+write(CVars, $9, format="text");
+
+write(Eta, $10);
+write(ANOVAF, $11);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleScale.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScale.R b/src/test/scripts/applications/descriptivestats/ScaleScale.R
index 202a057..690f3a3 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScale.R
+++ b/src/test/scripts/applications/descriptivestats/ScaleScale.R
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
-# command line invocation assuming $SS_HOME is set to the home of the R script
-# Rscript $SS_HOME/ScaleScale.R $SS_HOME/in/ $SS_HOME/expected/
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-X = readMM(paste(args[1], "X.mtx", sep=""))
-Y = readMM(paste(args[1], "Y.mtx", sep=""))
-
-# cor.test returns a list containing t-statistic, df, p-value, and R
-cort = cor.test(X[,1], Y[,1]);
-
-R = as.numeric(cort[4]);
-
-write(R, paste(args[2], "PearsonR", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# JUnit test class: dml.test.integration.descriptivestats.CategoricalCategoricalTest.java
+# command line invocation assuming $SS_HOME is set to the home of the R script
+# Rscript $SS_HOME/ScaleScale.R $SS_HOME/in/ $SS_HOME/expected/
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = readMM(paste(args[1], "X.mtx", sep=""))
+Y = readMM(paste(args[1], "Y.mtx", sep=""))
+
+# cor.test returns a list containing t-statistic, df, p-value, and R
+cort = cor.test(X[,1], Y[,1]);
+
+R = as.numeric(cort[4]);
+
+write(R, paste(args[2], "PearsonR", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/descriptivestats/ScaleScale.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/descriptivestats/ScaleScale.dml b/src/test/scripts/applications/descriptivestats/ScaleScale.dml
index e3d2183..0fd2179 100644
--- a/src/test/scripts/applications/descriptivestats/ScaleScale.dml
+++ b/src/test/scripts/applications/descriptivestats/ScaleScale.dml
@@ -1,48 +1,48 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Note this script is externalized to customers, please do not change w/o consulting component owner.
-# How to invoke this dml script ScaleScale.dml?
-# Assume $SS_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume X and Y are scale variables and both have 100000 rows
-# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$OUPUT_DIR/PearsonR"
-
-X = read($1, rows=$2, cols=1, format="text");
-Y = read($3, rows=$2, cols=1, format="text");
-
-W = nrow(X);
-
-# Unweighted co-variance
-covXY = cov(X,Y);
-
-# compute standard deviations for both X and Y by computing 2^nd central moment
-m2X = moment(X,2);
-m2Y = moment(Y,2);
-sigmaX = sqrt(m2X * (W/(W-1.0)) );
-sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-# Pearson's R
-R = covXY / (sigmaX*sigmaY);
-
-write(R, $4);
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Note this script is externalized to customers, please do not change w/o consulting component owner.
+# How to invoke this dml script ScaleScale.dml?
+# Assume $SS_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume X and Y are scale variables and both have 100000 rows
+# hadoop jar SystemML.jar -f $SS_HOME/ScaleScale.dml -args "$INPUT_DIR/X" 100000 "$INPUT_DIR/Y" "$OUPUT_DIR/PearsonR"
+
+X = read($1, rows=$2, cols=1, format="text");
+Y = read($3, rows=$2, cols=1, format="text");
+
+W = nrow(X);
+
+# Unweighted co-variance
+covXY = cov(X,Y);
+
+# compute standard deviations for both X and Y by computing 2^nd central moment
+m2X = moment(X,2);
+m2Y = moment(Y,2);
+sigmaX = sqrt(m2X * (W/(W-1.0)) );
+sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+# Pearson's R
+R = covXY / (sigmaX*sigmaY);
+
+write(R, $4);
+
+