You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/26 02:12:42 UTC
[18/55] [partial] incubator-systemml git commit: [SYSTEMML-482]
[SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line
endings, and normalizing all of the line endings.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate2.dml b/src/test/scripts/applications/parfor/parfor_bivariate2.dml
index f6dd4d5..d0734a9 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate2.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate2.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
- * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
- * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:
- * $1) D - input data
- * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- * $4) K1 - kind for attributes in S1
- * $5) K2 - kind for attributes in S2
- * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- * $6) numPairs - total number of pairs (m*n)
- * $7) maxC - maximum number of categories in any categorical attribute
- *
- * One output:
- * $6) output directory in which following four statistics files are created
- * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- * + categorical.counts -
- * + categorical.means -
- * + categorical.variances -
- * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
- * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8); # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11; # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
- a1 = castAsScalar(S1[,i]);
- k1 = castAsScalar(K1[1,i]);
- A1 = D[,a1];
-
- parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
- pairID = (i-1)*s2size+j;
- a2 = castAsScalar(S2[,j]);
- k2 = castAsScalar(K2[1,j]);
- A2 = D[,a2];
-
- if (k1 == k2) {
- if (k1 == 1) {
- # scale-scale
- print("[" + i + "," + j + "] scale-scale");
- r = bivar_ss(A1,A2);
- basestats[1,pairID] = r;
- } else {
- # nominal-nominal or ordinal-ordinal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
-
- if ( k1 == 3 ) {
- # ordinal-ordinal
- print("[" + i + "," + j + "] ordinal-ordinal");
- sp = bivar_oo(A1, A2);
- basestats[6,pairID] = sp;
- }
- }
- }
- else {
- if (k1 == 1 | k2 == 1) {
- # Scale-nominal/ordinal TODO MB correctness errors
- print("[" + i + "," + j + "] scale-categorical");
-
- if ( k1 == 1 ) {
- [eta,f, counts, means, vars] = bivar_sc(A1,A2);
- }
- else {
- [eta,f, counts, means, vars] = bivar_sc(A2,A1);
- }
- basestats[7,pairID] = eta;
- basestats[8,pairID] = f;
- cat_counts[,pairID] = counts;
- cat_means[,pairID] = means;
- cat_vars[,pairID] = vars;
- }
- else {
- # nominal-ordinal or ordinal-nominal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
- }
- }
- }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
- # Contingency Table
- F = table(A,B);
-
- # Chi-Squared
- W = sum(F);
- r = rowSums(F);
- c = colSums(F);
- E = (r %*% c)/W;
- T = (F-E)^2/E;
- chi_squared = sum(T);
-
- # compute p-value
- degFreedom = (nrow(F)-1)*(ncol(F)-1);
- pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
- # Cramer's V
- R = nrow(F);
- C = ncol(F);
- q = min(R,C);
- cramers_v = sqrt(chi_squared/(W*(q-1)));
-
- # Assign return values
- chisq = chi_squared;
- df = as.double(degFreedom);
- pval = pValue;
- cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
- # Unweighted co-variance
- covXY = cov(X,Y);
-
- # compute standard deviations for both X and Y by computing 2^nd central moment
- W = nrow(X);
- m2X = moment(X,2);
- m2Y = moment(Y,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
- # Pearson's R
- R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
- # mean and variance in target variable
- W = nrow(A);
- my = mean(Y);
- varY = moment(Y,2) * W/(W-1.0)
-
- # category-wise (frequencies, means, variances)
- CFreqs = aggregate(target=Y, groups=A, fn="count");
- CMeans = aggregate(target=Y, groups=A, fn="mean");
- CVars = aggregate(target=Y, groups=A, fn="variance");
-
- # number of categories
- R = nrow(CFreqs);
-
- Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
- anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
- anova_den = sum( (CFreqs-1)*CVars )/(W-R);
- AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
- Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
- # compute contingency table
- F = table(A,B);
-
- catA = nrow(F); # number of categories in A
- catB = ncol(F); # number of categories in B
-
- # compute category-wise counts for both the attributes
- R = rowSums(F);
- S = colSums(F);
-
- # compute scores, both are column vectors
- [C] = computeRanks(R);
- meanX = mean(C,R);
-
- columnS = t(S);
- [D] = computeRanks(columnS);
-
- # scores (C,D) are individual values, and counts (R,S) act as weights
- meanY = mean(D,columnS);
-
- W = sum(F); # total weight, or total #cases
- varX = moment(C,R,2)*(W/(W-1.0));
- varY = moment(D,columnS,2)*(W/(W-1.0));
- covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
- sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
+ * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
+ * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:
+ * $1) D - input data
+ * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ * $4) K1 - kind for attributes in S1
+ * $5) K2 - kind for attributes in S2
+ * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ * $6) numPairs - total number of pairs (m*n)
+ * $7) maxC - maximum number of categories in any categorical attribute
+ *
+ * One output:
+ * $6) output directory in which following four statistics files are created
+ * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ * + categorical.counts -
+ * + categorical.means -
+ * + categorical.variances -
+ * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
+ * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8); # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11; # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
+ a1 = castAsScalar(S1[,i]);
+ k1 = castAsScalar(K1[1,i]);
+ A1 = D[,a1];
+
+ parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
+ pairID = (i-1)*s2size+j;
+ a2 = castAsScalar(S2[,j]);
+ k2 = castAsScalar(K2[1,j]);
+ A2 = D[,a2];
+
+ if (k1 == k2) {
+ if (k1 == 1) {
+ # scale-scale
+ print("[" + i + "," + j + "] scale-scale");
+ r = bivar_ss(A1,A2);
+ basestats[1,pairID] = r;
+ } else {
+ # nominal-nominal or ordinal-ordinal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+
+ if ( k1 == 3 ) {
+ # ordinal-ordinal
+ print("[" + i + "," + j + "] ordinal-ordinal");
+ sp = bivar_oo(A1, A2);
+ basestats[6,pairID] = sp;
+ }
+ }
+ }
+ else {
+ if (k1 == 1 | k2 == 1) {
+ # Scale-nominal/ordinal TODO MB correctness errors
+ print("[" + i + "," + j + "] scale-categorical");
+
+ if ( k1 == 1 ) {
+ [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+ }
+ else {
+ [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+ }
+ basestats[7,pairID] = eta;
+ basestats[8,pairID] = f;
+ cat_counts[,pairID] = counts;
+ cat_means[,pairID] = means;
+ cat_vars[,pairID] = vars;
+ }
+ else {
+ # nominal-ordinal or ordinal-nominal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+ }
+ }
+ }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+ # Contingency Table
+ F = table(A,B);
+
+ # Chi-Squared
+ W = sum(F);
+ r = rowSums(F);
+ c = colSums(F);
+ E = (r %*% c)/W;
+ T = (F-E)^2/E;
+ chi_squared = sum(T);
+
+ # compute p-value
+ degFreedom = (nrow(F)-1)*(ncol(F)-1);
+ pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+ # Cramer's V
+ R = nrow(F);
+ C = ncol(F);
+ q = min(R,C);
+ cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+ # Assign return values
+ chisq = chi_squared;
+ df = as.double(degFreedom);
+ pval = pValue;
+ cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+ # Unweighted co-variance
+ covXY = cov(X,Y);
+
+ # compute standard deviations for both X and Y by computing 2^nd central moment
+ W = nrow(X);
+ m2X = moment(X,2);
+ m2Y = moment(Y,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+ # Pearson's R
+ R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+ # mean and variance in target variable
+ W = nrow(A);
+ my = mean(Y);
+ varY = moment(Y,2) * W/(W-1.0)
+
+ # category-wise (frequencies, means, variances)
+ CFreqs = aggregate(target=Y, groups=A, fn="count");
+ CMeans = aggregate(target=Y, groups=A, fn="mean");
+ CVars = aggregate(target=Y, groups=A, fn="variance");
+
+ # number of categories
+ R = nrow(CFreqs);
+
+ Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+ anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+ anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+ Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+ # compute contingency table
+ F = table(A,B);
+
+ catA = nrow(F); # number of categories in A
+ catB = ncol(F); # number of categories in B
+
+ # compute category-wise counts for both the attributes
+ R = rowSums(F);
+ S = colSums(F);
+
+ # compute scores, both are column vectors
+ [C] = computeRanks(R);
+ meanX = mean(C,R);
+
+ columnS = t(S);
+ [D] = computeRanks(columnS);
+
+ # scores (C,D) are individual values, and counts (R,S) act as weights
+ meanY = mean(D,columnS);
+
+ W = sum(F); # total weight, or total #cases
+ varX = moment(C,R,2)*(W/(W-1.0));
+ varY = moment(D,columnS,2)*(W/(W-1.0));
+ covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+ sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate3.dml b/src/test/scripts/applications/parfor/parfor_bivariate3.dml
index 1443c6e..990a6fe 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate3.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate3.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
- * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
- * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:
- * $1) D - input data
- * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- * $4) K1 - kind for attributes in S1
- * $5) K2 - kind for attributes in S2
- * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- * $6) numPairs - total number of pairs (m*n)
- * $7) maxC - maximum number of categories in any categorical attribute
- *
- * One output:
- * $6) output directory in which following four statistics files are created
- * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- * + categorical.counts -
- * + categorical.means -
- * + categorical.variances -
- * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
- * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8); # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11; # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
- a1 = castAsScalar(S1[,i]);
- k1 = castAsScalar(K1[1,i]);
- A1 = D[,a1];
-
- parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
- pairID = (i-1)*s2size+j;
- a2 = castAsScalar(S2[,j]);
- k2 = castAsScalar(K2[1,j]);
- A2 = D[,a2];
-
- if (k1 == k2) {
- if (k1 == 1) {
- # scale-scale
- print("[" + i + "," + j + "] scale-scale");
- r = bivar_ss(A1,A2);
- basestats[1,pairID] = r;
- } else {
- # nominal-nominal or ordinal-ordinal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
-
- if ( k1 == 3 ) {
- # ordinal-ordinal
- print("[" + i + "," + j + "] ordinal-ordinal");
- sp = bivar_oo(A1, A2);
- basestats[6,pairID] = sp;
- }
- }
- }
- else {
- if (k1 == 1 | k2 == 1) {
- # Scale-nominal/ordinal TODO MB correctness errors
- print("[" + i + "," + j + "] scale-categorical");
-
- if ( k1 == 1 ) {
- [eta,f, counts, means, vars] = bivar_sc(A1,A2);
- }
- else {
- [eta,f, counts, means, vars] = bivar_sc(A2,A1);
- }
- basestats[7,pairID] = eta;
- basestats[8,pairID] = f;
- cat_counts[,pairID] = counts;
- cat_means[,pairID] = means;
- cat_vars[,pairID] = vars;
- }
- else {
- # nominal-ordinal or ordinal-nominal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
- }
- }
- }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
- # Contingency Table
- F = table(A,B);
-
- # Chi-Squared
- W = sum(F);
- r = rowSums(F);
- c = colSums(F);
- E = (r %*% c)/W;
- T = (F-E)^2/E;
- chi_squared = sum(T);
-
- # compute p-value
- degFreedom = (nrow(F)-1)*(ncol(F)-1);
- pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
- # Cramer's V
- R = nrow(F);
- C = ncol(F);
- q = min(R,C);
- cramers_v = sqrt(chi_squared/(W*(q-1)));
-
- # Assign return values
- chisq = chi_squared;
- df = as.double(degFreedom);
- pval = pValue;
- cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
- # Unweighted co-variance
- covXY = cov(X,Y);
-
- # compute standard deviations for both X and Y by computing 2^nd central moment
- W = nrow(X);
- m2X = moment(X,2);
- m2Y = moment(Y,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
- # Pearson's R
- R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
- # mean and variance in target variable
- W = nrow(A);
- my = mean(Y);
- varY = moment(Y,2) * W/(W-1.0)
-
- # category-wise (frequencies, means, variances)
- CFreqs = aggregate(target=Y, groups=A, fn="count");
- CMeans = aggregate(target=Y, groups=A, fn="mean");
- CVars = aggregate(target=Y, groups=A, fn="variance");
-
- # number of categories
- R = nrow(CFreqs);
-
- Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
- anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
- anova_den = sum( (CFreqs-1)*CVars )/(W-R);
- AnovaF = anova_num/anova_den;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
- Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
- # compute contingency table
- F = table(A,B);
-
- catA = nrow(F); # number of categories in A
- catB = ncol(F); # number of categories in B
-
- # compute category-wise counts for both the attributes
- R = rowSums(F);
- S = colSums(F);
-
- # compute scores, both are column vectors
- [C] = computeRanks(R);
- meanX = mean(C,R);
-
- columnS = t(S);
- [D] = computeRanks(columnS);
-
- # scores (C,D) are individual values, and counts (R,S) act as weights
- meanY = mean(D,columnS);
-
- W = sum(F); # total weight, or total #cases
- varX = moment(C,R,2)*(W/(W-1.0));
- varY = moment(D,columnS,2)*(W/(W-1.0));
- covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
- sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
+ * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
+ * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:
+ * $1) D - input data
+ * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ * $4) K1 - kind for attributes in S1
+ * $5) K2 - kind for attributes in S2
+ * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ * $6) numPairs - total number of pairs (m*n)
+ * $7) maxC - maximum number of categories in any categorical attribute
+ *
+ * One output:
+ * $6) output directory in which following four statistics files are created
+ * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ * + categorical.counts -
+ * + categorical.means -
+ * + categorical.variances -
+ * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
+ * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8); # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11; # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) {
+ a1 = castAsScalar(S1[,i]);
+ k1 = castAsScalar(K1[1,i]);
+ A1 = D[,a1];
+
+ parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
+ pairID = (i-1)*s2size+j;
+ a2 = castAsScalar(S2[,j]);
+ k2 = castAsScalar(K2[1,j]);
+ A2 = D[,a2];
+
+ if (k1 == k2) {
+ if (k1 == 1) {
+ # scale-scale
+ print("[" + i + "," + j + "] scale-scale");
+ r = bivar_ss(A1,A2);
+ basestats[1,pairID] = r;
+ } else {
+ # nominal-nominal or ordinal-ordinal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+
+ if ( k1 == 3 ) {
+ # ordinal-ordinal
+ print("[" + i + "," + j + "] ordinal-ordinal");
+ sp = bivar_oo(A1, A2);
+ basestats[6,pairID] = sp;
+ }
+ }
+ }
+ else {
+ if (k1 == 1 | k2 == 1) {
+ # Scale-nominal/ordinal TODO MB correctness errors
+ print("[" + i + "," + j + "] scale-categorical");
+
+ if ( k1 == 1 ) {
+ [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+ }
+ else {
+ [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+ }
+ basestats[7,pairID] = eta;
+ basestats[8,pairID] = f;
+ cat_counts[,pairID] = counts;
+ cat_means[,pairID] = means;
+ cat_vars[,pairID] = vars;
+ }
+ else {
+ # nominal-ordinal or ordinal-nominal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+ }
+ }
+ }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+ # Contingency Table
+ F = table(A,B);
+
+ # Chi-Squared
+ W = sum(F);
+ r = rowSums(F);
+ c = colSums(F);
+ E = (r %*% c)/W;
+ T = (F-E)^2/E;
+ chi_squared = sum(T);
+
+ # compute p-value
+ degFreedom = (nrow(F)-1)*(ncol(F)-1);
+ pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+ # Cramer's V
+ R = nrow(F);
+ C = ncol(F);
+ q = min(R,C);
+ cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+ # Assign return values
+ chisq = chi_squared;
+ df = as.double(degFreedom);
+ pval = pValue;
+ cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+ # Unweighted co-variance
+ covXY = cov(X,Y);
+
+ # compute standard deviations for both X and Y by computing 2^nd central moment
+ W = nrow(X);
+ m2X = moment(X,2);
+ m2Y = moment(Y,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+ # Pearson's R
+ R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+ # mean and variance in target variable
+ W = nrow(A);
+ my = mean(Y);
+ varY = moment(Y,2) * W/(W-1.0)
+
+ # category-wise (frequencies, means, variances)
+ CFreqs = aggregate(target=Y, groups=A, fn="count");
+ CMeans = aggregate(target=Y, groups=A, fn="mean");
+ CVars = aggregate(target=Y, groups=A, fn="variance");
+
+ # number of categories
+ R = nrow(CFreqs);
+
+ Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+ anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+ anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+ Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+ # compute contingency table
+ F = table(A,B);
+
+ catA = nrow(F); # number of categories in A
+ catB = ncol(F); # number of categories in B
+
+ # compute category-wise counts for both the attributes
+ R = rowSums(F);
+ S = colSums(F);
+
+ # compute scores, both are column vectors
+ [C] = computeRanks(R);
+ meanX = mean(C,R);
+
+ columnS = t(S);
+ [D] = computeRanks(columnS);
+
+ # scores (C,D) are individual values, and counts (R,S) act as weights
+ meanY = mean(D,columnS);
+
+ W = sum(F); # total weight, or total #cases
+ varX = moment(C,R,2)*(W/(W-1.0));
+ varY = moment(D,columnS,2)*(W/(W-1.0));
+ covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+ sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_bivariate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate4.dml b/src/test/scripts/applications/parfor/parfor_bivariate4.dml
index 9b85bad..a19957f 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate4.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate4.dml
@@ -1,258 +1,258 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
- * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
- * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
- *
- * Seven inputs:
- * $1) D - input data
- * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- * $4) K1 - kind for attributes in S1
- * $5) K2 - kind for attributes in S2
- * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- * $6) numPairs - total number of pairs (m*n)
- * $7) maxC - maximum number of categories in any categorical attribute
- *
- * One output:
- * $6) output directory in which following four statistics files are created
- * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
- * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- * + categorical.counts -
- * + categorical.means -
- * + categorical.variances -
- * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
- * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8); # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11; # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-#numpairs = s1size * s2size;
-#print(s1size + ", " + s2size + ", " + numpairs);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, check=0) {
- a1 = castAsScalar(S1[,i]);
- k1 = castAsScalar(K1[1,i]);
- A1 = D[,a1];
-
- parfor( j in 1:s2size, check=0) {
- pairID = (i-1)*s2size+j;
- a2 = castAsScalar(S2[,j]);
- k2 = castAsScalar(K2[1,j]);
- A2 = D[,a2];
-
- if (k1 == k2) {
- if (k1 == 1) {
- # scale-scale
- print("[" + i + "," + j + "] scale-scale");
- r = bivar_ss(A1,A2);
- basestats[1,pairID] = r;
- } else {
- # nominal-nominal or ordinal-ordinal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
-
- if ( k1 == 3 ) {
- # ordinal-ordinal
- print("[" + i + "," + j + "] ordinal-ordinal");
- sp = bivar_oo(A1, A2);
- basestats[6,pairID] = sp;
- }
- }
- }
- else {
- if (k1 == 1 | k2 == 1) {
- # Scale-nominal/ordinal
- print("[" + i + "," + j + "] scale-categorical");
-
- if ( k1 == 1 ) {
- [eta,f, counts, means, vars] = bivar_sc(A1,A2);
- }
- else {
- [eta,f, counts, means, vars] = bivar_sc(A2,A1);
- }
- basestats[7,pairID] = eta;
- basestats[8,pairID] = f;
- cat_counts[,pairID] = counts;
- cat_means[,pairID] = means;
- cat_vars[,pairID] = vars;
- }
- else {
- # nominal-ordinal or ordinal-nominal
- print("[" + i + "," + j + "] categorical-categorical");
- [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
- basestats[2,pairID] = chisq;
- basestats[3,pairID] = df;
- basestats[4,pairID] = pval;
- basestats[5,pairID] = cramersv;
- }
- }
- }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
-
- # Contingency Table
- F = table(A,B);
-
- # Chi-Squared
- W = sum(F);
- r = rowSums(F);
- c = colSums(F);
- E = (r %*% c)/W;
- T = (F-E)^2/E;
- chi_squared = sum(T);
-
- # compute p-value
- degFreedom = (nrow(F)-1)*(ncol(F)-1);
- pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
- # Cramer's V
- R = nrow(F);
- C = ncol(F);
- q = min(R,C);
- cramers_v = sqrt(chi_squared/(W*(q-1)));
-
- # Assign return values
- chisq = chi_squared;
- df = as.double(degFreedom);
- pval = pValue;
- cramersv = cramers_v;
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
- # Unweighted co-variance
- covXY = cov(X,Y);
-
- # compute standard deviations for both X and Y by computing 2^nd central moment
- W = nrow(X);
- m2X = moment(X,2);
- m2Y = moment(Y,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
- # Pearson's R
- R = covXY / (sigmaX*sigmaY);
-}
-
-# -----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-
- # mean and variance in target variable
- W = nrow(A);
- my = mean(Y);
- varY = moment(Y,2) * W/(W-1.0)
-
- # category-wise (frequencies, means, variances)
- CFreqs = aggregate(target=Y, groups=A, fn="count");
- CMeans = aggregate(target=Y, groups=A, fn="mean");
- CVars = aggregate(target=Y, groups=A, fn="variance");
-
- # number of categories
- R = nrow(CFreqs);
-
- Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
- anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
- anova_den = sum( (CFreqs-1)*CVars )/(W-R);
- AnovaF = anova_num/anova_den;
-}
-
-
-# -----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
- Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
- # compute contingency table
- F = table(A,B);
-
- catA = nrow(F); # number of categories in A
- catB = ncol(F); # number of categories in B
-
- # compute category-wise counts for both the attributes
- R = rowSums(F);
- S = colSums(F);
-
- # compute scores, both are column vectors
- [C] = computeRanks(R);
- meanX = mean(C,R);
-
- columnS = t(S);
- [D] = computeRanks(columnS);
-
- # scores (C,D) are individual values, and counts (R,S) act as weights
- meanY = mean(D,columnS);
-
- W = sum(F); # total weight, or total #cases
- varX = moment(C,R,2)*(W/(W-1.0));
- varY = moment(D,columnS,2)*(W/(W-1.0));
- covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
- sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# -----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
+ * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
+ * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:
+ * $1) D - input data
+ * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ * $4) K1 - kind for attributes in S1
+ * $5) K2 - kind for attributes in S2
+ * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ * $6) numPairs - total number of pairs (m*n)
+ * $7) maxC - maximum number of categories in any categorical attribute
+ *
+ * One output:
+ * $6) output directory in which following four statistics files are created
+ * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ * + categorical.counts -
+ * + categorical.means -
+ * + categorical.variances -
+ * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
+ * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8); # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11; # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, check=0) {
+ a1 = castAsScalar(S1[,i]);
+ k1 = castAsScalar(K1[1,i]);
+ A1 = D[,a1];
+
+ parfor( j in 1:s2size, check=0) {
+ pairID = (i-1)*s2size+j;
+ a2 = castAsScalar(S2[,j]);
+ k2 = castAsScalar(K2[1,j]);
+ A2 = D[,a2];
+
+ if (k1 == k2) {
+ if (k1 == 1) {
+ # scale-scale
+ print("[" + i + "," + j + "] scale-scale");
+ r = bivar_ss(A1,A2);
+ basestats[1,pairID] = r;
+ } else {
+ # nominal-nominal or ordinal-ordinal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+
+ if ( k1 == 3 ) {
+ # ordinal-ordinal
+ print("[" + i + "," + j + "] ordinal-ordinal");
+ sp = bivar_oo(A1, A2);
+ basestats[6,pairID] = sp;
+ }
+ }
+ }
+ else {
+ if (k1 == 1 | k2 == 1) {
+ # Scale-nominal/ordinal
+ print("[" + i + "," + j + "] scale-categorical");
+
+ if ( k1 == 1 ) {
+ [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+ }
+ else {
+ [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+ }
+ basestats[7,pairID] = eta;
+ basestats[8,pairID] = f;
+ cat_counts[,pairID] = counts;
+ cat_means[,pairID] = means;
+ cat_vars[,pairID] = vars;
+ }
+ else {
+ # nominal-ordinal or ordinal-nominal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = chisq;
+ basestats[3,pairID] = df;
+ basestats[4,pairID] = pval;
+ basestats[5,pairID] = cramersv;
+ }
+ }
+ }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+ # Contingency Table
+ F = table(A,B);
+
+ # Chi-Squared
+ W = sum(F);
+ r = rowSums(F);
+ c = colSums(F);
+ E = (r %*% c)/W;
+ T = (F-E)^2/E;
+ chi_squared = sum(T);
+
+ # compute p-value
+ degFreedom = (nrow(F)-1)*(ncol(F)-1);
+ pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+ # Cramer's V
+ R = nrow(F);
+ C = ncol(F);
+ q = min(R,C);
+ cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+ # Assign return values
+ chisq = chi_squared;
+ df = as.double(degFreedom);
+ pval = pValue;
+ cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+ # Unweighted co-variance
+ covXY = cov(X,Y);
+
+ # compute standard deviations for both X and Y by computing 2^nd central moment
+ W = nrow(X);
+ m2X = moment(X,2);
+ m2Y = moment(Y,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+ # Pearson's R
+ R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+ # mean and variance in target variable
+ W = nrow(A);
+ my = mean(Y);
+ varY = moment(Y,2) * W/(W-1.0)
+
+ # category-wise (frequencies, means, variances)
+ CFreqs = aggregate(target=Y, groups=A, fn="count");
+ CMeans = aggregate(target=Y, groups=A, fn="mean");
+ CVars = aggregate(target=Y, groups=A, fn="variance");
+
+ # number of categories
+ R = nrow(CFreqs);
+
+ Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+ anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+ anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ AnovaF = anova_num/anova_den;
+}
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+ Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+ # compute contingency table
+ F = table(A,B);
+
+ catA = nrow(F); # number of categories in A
+ catB = ncol(F); # number of categories in B
+
+ # compute category-wise counts for both the attributes
+ R = rowSums(F);
+ S = colSums(F);
+
+ # compute scores, both are column vectors
+ [C] = computeRanks(R);
+ meanX = mean(C,R);
+
+ columnS = t(S);
+ [D] = computeRanks(columnS);
+
+ # scores (C,D) are individual values, and counts (R,S) act as weights
+ meanY = mean(D,columnS);
+
+ W = sum(F); # total weight, or total #cases
+ varX = moment(C,R,2)*(W/(W-1.0));
+ varY = moment(D,columnS,2)*(W/(W-1.0));
+ covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+ sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr.R b/src/test/scripts/applications/parfor/parfor_corr.R
index 5f8c315..854e593 100644
--- a/src/test/scripts/applications/parfor/parfor_corr.R
+++ b/src/test/scripts/applications/parfor/parfor_corr.R
@@ -1,48 +1,48 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-V1 <- readMM(paste(args[1], "V.mtx", sep=""))
-V <- as.matrix(V1);
-
-m <- nrow(V);
-n <- ncol(V);
-W <- m;
-
-R <- array(0,dim=c(n,n))
-
-for( i in 1:(n-1) )
-{
- X <- V[ ,i];
-
- for( j in (i+1):n )
- {
- Y <- V[ ,j];
- R[i,j] <- cor(X, Y)
- #print(R[i,j]);
- }
-}
-
-writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+V1 <- readMM(paste(args[1], "V.mtx", sep=""))
+V <- as.matrix(V1);
+
+m <- nrow(V);
+n <- ncol(V);
+W <- m;
+
+R <- array(0,dim=c(n,n))
+
+for( i in 1:(n-1) )
+{
+ X <- V[ ,i];
+
+ for( j in (i+1):n )
+ {
+ Y <- V[ ,j];
+ R[i,j] <- cor(X, Y)
+ #print(R[i,j]);
+ }
+}
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep=""));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr0.dml b/src/test/scripts/applications/parfor/parfor_corr0.dml
index 7a340bd..a711d0a 100644
--- a/src/test/scripts/applications/parfor/parfor_corr0.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr0.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0,rows=n,cols=n);
-
-for( i in 1:(n-1) )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- for( j in (i+1):n )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- #print("R[("+i+","+j+")]="+rXY);
- R[i,j] = rXY;
-
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0,rows=n,cols=n);
+
+for( i in 1:(n-1) )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ for( j in (i+1):n )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ #print("R[("+i+","+j+")]="+rXY);
+ R[i,j] = rXY;
+
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr1.dml b/src/test/scripts/applications/parfor/parfor_corr1.dml
index 8bb0368..b2d5a14 100644
--- a/src/test/scripts/applications/parfor/parfor_corr1.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr1.dml
@@ -1,50 +1,50 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- #print("R[("+i+","+j+")]="+rXY);
- R[i,j] = rXY;
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ #print("R[("+i+","+j+")]="+rXY);
+ R[i,j] = rXY;
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr2.dml b/src/test/scripts/applications/parfor/parfor_corr2.dml
index c5f5c31..9e10534 100644
--- a/src/test/scripts/applications/parfor/parfor_corr2.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr2.dml
@@ -1,50 +1,50 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
- R[i,j] = rXY;
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
+ R[i,j] = rXY;
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr3.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr3.dml b/src/test/scripts/applications/parfor/parfor_corr3.dml
index 46295a3..95fc38f 100644
--- a/src/test/scripts/applications/parfor/parfor_corr3.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr3.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
- R[i,j] = rXY;
-
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter
+ R[i,j] = rXY;
+
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr4.dml b/src/test/scripts/applications/parfor/parfor_corr4.dml
index 62e643d..16b4767 100644
--- a/src/test/scripts/applications/parfor/parfor_corr4.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr4.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1) )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- print("R[{"+i+","+j+"}]="+rXY);
- R[i,j] = rXY;
-
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1) )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ print("R[{"+i+","+j+"}]="+rXY);
+ R[i,j] = rXY;
+
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr5.dml b/src/test/scripts/applications/parfor/parfor_corr5.dml
index 0da9539..1663b8f 100644
--- a/src/test/scripts/applications/parfor/parfor_corr5.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr5.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1), profile=1 )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n, profile=1 )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- print("R[{"+i+","+j+"}]="+rXY);
- R[i,j] = rXY;
-
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1), profile=1 )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n, profile=1 )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ print("R[{"+i+","+j+"}]="+rXY);
+ R[i,j] = rXY;
+
+ }
+}
+
write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/parfor/parfor_corr6.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_corr6.dml b/src/test/scripts/applications/parfor/parfor_corr6.dml
index 166b3d7..2ee26cc 100644
--- a/src/test/scripts/applications/parfor/parfor_corr6.dml
+++ b/src/test/scripts/applications/parfor/parfor_corr6.dml
@@ -1,51 +1,51 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-V = read($1,rows=$2,cols=$3);
-m = $2;
-n = $3;
-W = m;
-
-R = matrix(0, rows=n,cols=n);
-
-parfor( i in 1:(n-1), log=debug )
-{
- X = V[,i];
- m2X = moment(X,2);
- sigmaX = sqrt(m2X * (W/(W-1.0)) );
-
- parfor( j in (i+1):n )
- {
- Y = V[,j];
-
- #corr computation
- m2Y = moment(Y,2);
- sigmaY = sqrt(m2Y * (W/(W-1.0)) );
- covXY = cov(X,Y);
- rXY = covXY / (sigmaX*sigmaY);
-
- print("R[{"+i+","+j+"}]="+rXY);
- R[i,j] = rXY;
-
- }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+
+parfor( i in 1:(n-1), log=debug )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ print("R[{"+i+","+j+"}]="+rXY);
+ R[i,j] = rXY;
+
+ }
+}
+
write(R, $4);
\ No newline at end of file