You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:33:52 UTC
[16/51] [partial] incubator-systemml git commit: [SYSTEMML-482]
[SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line
endings, and normalizing all of the line endings.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_naive-bayes.R b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
index f455c2c..cb0d00f 100644
--- a/src/test/scripts/applications/parfor/parfor_naive-bayes.R
+++ b/src/test/scripts/applications/parfor/parfor_naive-bayes.R
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = 1
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
- Col = D[,i]
- classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-repClassSums = classSums %*% matrix(1,1,numFeatures);
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "D.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "C.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = 1
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+ Col = D[,i]
+ classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+repClassSums = classSums %*% matrix(1,1,numFeatures);
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[3], "class_prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[3], "class_conditionals", sep=""));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate.R b/src/test/scripts/applications/parfor/parfor_univariate.R
index cb5dfb1..14f9f95 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate.R
+++ b/src/test/scripts/applications/parfor/parfor_univariate.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-library("moments")
-
-A1 <- readMM(paste(args[1], "D.mtx", sep=""))
-K1 <- readMM(paste(args[1], "K.mtx", sep=""))
-A <- as.matrix(A1);
-K <- as.matrix(K1);
-maxC = args[2];
-
-
-# number of features/attributes
-n = ncol(A);
-
-# number of data records
-m = nrow(A);
-
-# number of statistics
-numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-max_kind = max(K);
-
-# matrices to store computed statistics
-baseStats = array(0,dim=c(numBaseStats,n));
-
-if (maxC > 0) {
- countsArray = array(0,dim=c(maxC,n));
-}
-
-for(i in 1:n) {
-
- # project out the i^th column
- F = as.matrix(A[,i]);
-
- kind = K[1,i];
-
- if ( kind == 1 ) {
- print("scale");
- # compute SCALE statistics on the projected column
- minimum = min(F);
- maximum = max(F);
- rng = maximum - minimum;
-
- mu = mean(F);
- m2 = moment(F, order=2, central=TRUE);
- m3 = moment(F, order=3, central=TRUE);
- m4 = moment(F, order=4, central=TRUE);
-
- var = m/(m-1.0)*m2;
-
- std_dev = sqrt(var);
- se = std_dev/sqrt(m);
- cv = std_dev/mu;
-
- g1 = m3/(std_dev^3);
- g2 = m4/(std_dev^4) - 3;
- #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
- se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
-
- #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
- se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
-
- md = median(F); #quantile(F, 0.5, type = 1);
-
- S = sort(F)
- q25d=m*0.25
- q75d=m*0.75
- q25i=ceiling(q25d)
- q75i=ceiling(q75d)
-
- iqm = sum(S[(q25i+1):q75i])
- iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
- iqm = iqm/(m*0.5)
-
- #iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
-
- # place the computed statistics in output matrices
- baseStats[1,i] = minimum;
- baseStats[2,i] = maximum;
- baseStats[3,i] = rng;
-
- baseStats[4,i] = mu;
- baseStats[5,i] = var;
- baseStats[6,i] = std_dev;
- baseStats[7,i] = se;
- baseStats[8,i] = cv;
-
- baseStats[9,i] = g1;
- baseStats[10,i] = g2;
- baseStats[11,i] = se_g1;
- baseStats[12,i] = se_g2;
-
- baseStats[13,i] = md;
- baseStats[14,i] = iqm;
- }
- else {
- if (kind == 2 | kind == 3) {
- print("categorical");
-
- # check if the categorical column has valid values
- minF = min(F);
- if (minF <=0) {
- print("ERROR: Categorical attributes can only take values starting from 1.");
- }
- else {
- # compute CATEGORICAL statistics on the projected column
- cat_counts = table(F); # counts for each category
- num_cat = nrow(cat_counts); # number of categories
-
- mx = max(t(as.vector(cat_counts)))
- mode = which(cat_counts == mx)
-
- numModes = length(cat_counts[ cat_counts==mx ]);
-
- # place the computed statistics in output matrices
- baseStats[15,i] = num_cat;
- baseStats[16,i] = mode;
- baseStats[17,i] = numModes;
-
- if (max_kind > 1) {
- countsArray[1:length(cat_counts),i] = cat_counts;
- }
- }
- }
- }
-}
-
-writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
-if (max_kind > 1) {
- writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+library("moments")
+
+A1 <- readMM(paste(args[1], "D.mtx", sep=""))
+K1 <- readMM(paste(args[1], "K.mtx", sep=""))
+A <- as.matrix(A1);
+K <- as.matrix(K1);
+maxC = args[2];
+
+
+# number of features/attributes
+n = ncol(A);
+
+# number of data records
+m = nrow(A);
+
+# number of statistics
+numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+max_kind = max(K);
+
+# matrices to store computed statistics
+baseStats = array(0,dim=c(numBaseStats,n));
+
+if (maxC > 0) {
+ countsArray = array(0,dim=c(maxC,n));
+}
+
+for(i in 1:n) {
+
+ # project out the i^th column
+ F = as.matrix(A[,i]);
+
+ kind = K[1,i];
+
+ if ( kind == 1 ) {
+ print("scale");
+ # compute SCALE statistics on the projected column
+ minimum = min(F);
+ maximum = max(F);
+ rng = maximum - minimum;
+
+ mu = mean(F);
+ m2 = moment(F, order=2, central=TRUE);
+ m3 = moment(F, order=3, central=TRUE);
+ m4 = moment(F, order=4, central=TRUE);
+
+ var = m/(m-1.0)*m2;
+
+ std_dev = sqrt(var);
+ se = std_dev/sqrt(m);
+ cv = std_dev/mu;
+
+ g1 = m3/(std_dev^3);
+ g2 = m4/(std_dev^4) - 3;
+ #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
+ se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
+
+ #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
+ se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
+
+ md = median(F); #quantile(F, 0.5, type = 1);
+
+ S = sort(F)
+ q25d=m*0.25
+ q75d=m*0.75
+ q25i=ceiling(q25d)
+ q75i=ceiling(q75d)
+
+ iqm = sum(S[(q25i+1):q75i])
+ iqm = iqm + (q25i-q25d)*S[q25i] - (q75i-q75d)*S[q75i]
+ iqm = iqm/(m*0.5)
+
+ #iqm = mean( subset(F, F>quantile(F,1/4,type = 1) & F<=quantile(F,3/4,type = 1) ) )
+
+ # place the computed statistics in output matrices
+ baseStats[1,i] = minimum;
+ baseStats[2,i] = maximum;
+ baseStats[3,i] = rng;
+
+ baseStats[4,i] = mu;
+ baseStats[5,i] = var;
+ baseStats[6,i] = std_dev;
+ baseStats[7,i] = se;
+ baseStats[8,i] = cv;
+
+ baseStats[9,i] = g1;
+ baseStats[10,i] = g2;
+ baseStats[11,i] = se_g1;
+ baseStats[12,i] = se_g2;
+
+ baseStats[13,i] = md;
+ baseStats[14,i] = iqm;
+ }
+ else {
+ if (kind == 2 | kind == 3) {
+ print("categorical");
+
+ # check if the categorical column has valid values
+ minF = min(F);
+ if (minF <=0) {
+ print("ERROR: Categorical attributes can only take values starting from 1.");
+ }
+ else {
+ # compute CATEGORICAL statistics on the projected column
+ cat_counts = table(F); # counts for each category
+ num_cat = nrow(cat_counts); # number of categories
+
+ mx = max(t(as.vector(cat_counts)))
+ mode = which(cat_counts == mx)
+
+ numModes = length(cat_counts[ cat_counts==mx ]);
+
+ # place the computed statistics in output matrices
+ baseStats[15,i] = num_cat;
+ baseStats[16,i] = mode;
+ baseStats[17,i] = numModes;
+
+ if (max_kind > 1) {
+ countsArray[1:length(cat_counts),i] = cat_counts;
+ }
+ }
+ }
+ }
+}
+
+writeMM(as(baseStats, "CsparseMatrix"), paste(args[3], "base.stats", sep=""));
+if (max_kind > 1) {
+ writeMM(as(countsArray, "CsparseMatrix"), paste(args[3], "categorical.counts", sep=""));
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate0.dml b/src/test/scripts/applications/parfor/parfor_univariate0.dml
index 061d4a0..2a6a9c5 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate0.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate0.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes
-# in a given data set
-#
-# Three inputs:
-# $1) A - input data
-# $2) K - row matrix that denotes the "kind" for each
-# attribute
-# kind=1 for scale, kind=2 for nominal,
-# kind=3 for ordinal
-# $3) maxC - maximum number of categories in any categorical
-# attribute
-#
-# One output:
-# $4) output directory in which following three statistics
-# files are created
-# + base.stats - matrix with all 17 statistics (14 scale,
-# 3 categorical) computed for all attributes
-# + categorical.counts - matrix in which each column
-# gives the category-wise counts for all categories in
-# that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3; # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
- print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-
-
- # number of features/attributes
- n = ncol(A);
-
- # number of data records
- m = nrow(A);
-
- # number of statistics
- numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
- max_kind = max(K);
-
- # matrices to store computed statistics
- baseStats = matrix(0, rows=numBaseStats, cols=n);
-
- if (maxC > 0) {
- countsArray = matrix(0, rows=maxC, cols=n);
- }
-
- for(i in 1:n) {
-
- # project out the i^th column
- F = A[,i];
-
- kind = castAsScalar(K[1,i]);
-
- if ( kind == 1 ) {
- print("[" + i + "] Scale");
- # compute SCALE statistics on the projected column
- minimum = min(F);
- maximum = max(F);
- rng = maximum - minimum;
-
- mu = mean(F);
- m2 = moment(F, 2);
- m3 = moment(F, 3);
- m4 = moment(F, 4);
-
- var = m/(m-1.0)*m2;
- std_dev = sqrt(var);
- se = std_dev/sqrt(m);
- cv = std_dev/mu;
-
- g1 = m3/(std_dev^3);
- g2 = m4/(std_dev^4) - 3;
- #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
- se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
-
- #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
- se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
-
- md = median(F); #quantile(F, 0.5);
- iqm = interQuartileMean(F);
-
- # place the computed statistics in output matrices
- baseStats[1,i] = minimum;
- baseStats[2,i] = maximum;
- baseStats[3,i] = rng;
-
- baseStats[4,i] = mu;
- baseStats[5,i] = var;
- baseStats[6,i] = std_dev;
- baseStats[7,i] = se;
- baseStats[8,i] = cv;
-
- baseStats[9,i] = g1;
- baseStats[10,i] = g2;
- baseStats[11,i] = se_g1;
- baseStats[12,i] = se_g2;
-
- baseStats[13,i] = md;
- baseStats[14,i] = iqm;
- }
- else {
- if (kind == 2 | kind == 3) {
- print("[" + i + "] Categorical");
-
- # check if the categorical column has valid values
- minF = min(F);
- if (minF <=0) {
- print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
- }
- else {
- # compute CATEGORICAL statistics on the projected column
- cat_counts = table(F,1); # counts for each category
- num_cat = nrow(cat_counts); # number of categories
-
- mode = rowIndexMax(t(cat_counts));
- mx = max(cat_counts)
- modeArr = ppred(cat_counts, mx, "==")
- numModes = sum(modeArr);
-
- # place the computed statistics in output matrices
- baseStats[15,i] = num_cat;
- baseStats[16,i] = mode;
- baseStats[17,i] = numModes;
-
- if (max_kind > 1) {
- countsArray[,i] = cat_counts;
- }
- }
- }
- }
- }
-
- write(baseStats, $4+"/base.stats");
- if (max_kind > 1) {
- write(countsArray, $4+"/categorical.counts");
- }
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes
+# in a given data set
+#
+# Three inputs:
+# $1) A - input data
+# $2) K - row matrix that denotes the "kind" for each
+# attribute
+# kind=1 for scale, kind=2 for nominal,
+# kind=3 for ordinal
+# $3) maxC - maximum number of categories in any categorical
+# attribute
+#
+# One output:
+# $4) output directory in which following three statistics
+# files are created
+# + base.stats - matrix with all 17 statistics (14 scale,
+# 3 categorical) computed for all attributes
+# + categorical.counts - matrix in which each column
+# gives the category-wise counts for all categories in
+# that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3; # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+ print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+
+
+ # number of features/attributes
+ n = ncol(A);
+
+ # number of data records
+ m = nrow(A);
+
+ # number of statistics
+ numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+ max_kind = max(K);
+
+ # matrices to store computed statistics
+ baseStats = matrix(0, rows=numBaseStats, cols=n);
+
+ if (maxC > 0) {
+ countsArray = matrix(0, rows=maxC, cols=n);
+ }
+
+ for(i in 1:n) {
+
+ # project out the i^th column
+ F = A[,i];
+
+ kind = castAsScalar(K[1,i]);
+
+ if ( kind == 1 ) {
+ print("[" + i + "] Scale");
+ # compute SCALE statistics on the projected column
+ minimum = min(F);
+ maximum = max(F);
+ rng = maximum - minimum;
+
+ mu = mean(F);
+ m2 = moment(F, 2);
+ m3 = moment(F, 3);
+ m4 = moment(F, 4);
+
+ var = m/(m-1.0)*m2;
+ std_dev = sqrt(var);
+ se = std_dev/sqrt(m);
+ cv = std_dev/mu;
+
+ g1 = m3/(std_dev^3);
+ g2 = m4/(std_dev^4) - 3;
+ #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
+ se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
+
+ #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
+ se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
+
+ md = median(F); #quantile(F, 0.5);
+ iqm = interQuartileMean(F);
+
+ # place the computed statistics in output matrices
+ baseStats[1,i] = minimum;
+ baseStats[2,i] = maximum;
+ baseStats[3,i] = rng;
+
+ baseStats[4,i] = mu;
+ baseStats[5,i] = var;
+ baseStats[6,i] = std_dev;
+ baseStats[7,i] = se;
+ baseStats[8,i] = cv;
+
+ baseStats[9,i] = g1;
+ baseStats[10,i] = g2;
+ baseStats[11,i] = se_g1;
+ baseStats[12,i] = se_g2;
+
+ baseStats[13,i] = md;
+ baseStats[14,i] = iqm;
+ }
+ else {
+ if (kind == 2 | kind == 3) {
+ print("[" + i + "] Categorical");
+
+ # check if the categorical column has valid values
+ minF = min(F);
+ if (minF <=0) {
+ print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+ }
+ else {
+ # compute CATEGORICAL statistics on the projected column
+ cat_counts = table(F,1); # counts for each category
+ num_cat = nrow(cat_counts); # number of categories
+
+ mode = rowIndexMax(t(cat_counts));
+ mx = max(cat_counts)
+ modeArr = ppred(cat_counts, mx, "==")
+ numModes = sum(modeArr);
+
+ # place the computed statistics in output matrices
+ baseStats[15,i] = num_cat;
+ baseStats[16,i] = mode;
+ baseStats[17,i] = numModes;
+
+ if (max_kind > 1) {
+ countsArray[,i] = cat_counts;
+ }
+ }
+ }
+ }
+ }
+
+ write(baseStats, $4+"/base.stats");
+ if (max_kind > 1) {
+ write(countsArray, $4+"/categorical.counts");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate1.dml b/src/test/scripts/applications/parfor/parfor_univariate1.dml
index e22fd86..1f120ef 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate1.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate1.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes
-# in a given data set
-#
-# Three inputs:
-# $1) A - input data
-# $2) K - row matrix that denotes the "kind" for each
-# attribute
-# kind=1 for scale, kind=2 for nominal,
-# kind=3 for ordinal
-# $3) maxC - maximum number of categories in any categorical
-# attribute
-#
-# One output:
-# $4) output directory in which following three statistics
-# files are created
-# + base.stats - matrix with all 17 statistics (14 scale,
-# 3 categorical) computed for all attributes
-# + categorical.counts - matrix in which each column
-# gives the category-wise counts for all categories in
-# that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3; # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
- print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-
-
- # number of features/attributes
- n = ncol(A);
-
- # number of data records
- m = nrow(A);
-
- # number of statistics
- numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
- max_kind = max(K);
-
- # matrices to store computed statistics
- baseStats = matrix(0, rows=numBaseStats, cols=n);
-
- if (maxC > 0) {
- countsArray = matrix(0, rows=maxC, cols=n);
- }
-
- parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
-
- # project out the i^th column
- F = A[,i];
-
- kind = castAsScalar(K[1,i]);
-
- if ( kind == 1 ) {
- print("[" + i + "] Scale");
- # compute SCALE statistics on the projected column
- minimum = min(F);
- maximum = max(F);
- rng = maximum - minimum;
-
- mu = mean(F);
- m2 = moment(F, 2);
- m3 = moment(F, 3);
- m4 = moment(F, 4);
-
- var = m/(m-1.0)*m2;
- std_dev = sqrt(var);
- se = std_dev/sqrt(m);
- cv = std_dev/mu;
-
- g1 = m3/(std_dev^3);
- g2 = m4/(std_dev^4) - 3;
- #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
- se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
-
- #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
- se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
-
- md = median(F); #quantile(F, 0.5);
- iqm = interQuartileMean(F);
-
- # place the computed statistics in output matrices
- baseStats[1,i] = minimum;
- baseStats[2,i] = maximum;
- baseStats[3,i] = rng;
-
- baseStats[4,i] = mu;
- baseStats[5,i] = var;
- baseStats[6,i] = std_dev;
- baseStats[7,i] = se;
- baseStats[8,i] = cv;
-
- baseStats[9,i] = g1;
- baseStats[10,i] = g2;
- baseStats[11,i] = se_g1;
- baseStats[12,i] = se_g2;
-
- baseStats[13,i] = md;
- baseStats[14,i] = iqm;
- }
- else {
- if (kind == 2 | kind == 3) {
- print("[" + i + "] Categorical");
-
- # check if the categorical column has valid values
- minF = min(F);
- if (minF <=0) {
- print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
- }
- else {
- # compute CATEGORICAL statistics on the projected column
- cat_counts = table(F,1); # counts for each category
- num_cat = nrow(cat_counts); # number of categories
-
- mode = rowIndexMax(t(cat_counts));
- mx = max(cat_counts)
- modeArr = ppred(cat_counts, mx, "==")
- numModes = sum(modeArr);
-
- # place the computed statistics in output matrices
- baseStats[15,i] = num_cat;
- baseStats[16,i] = mode;
- baseStats[17,i] = numModes;
-
- if (max_kind > 1) {
- countsArray[,i] = cat_counts;
- }
- }
- }
- }
- }
-
- write(baseStats, $4+"/base.stats");
- if (max_kind > 1) {
- write(countsArray, $4+"/categorical.counts");
- }
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes
+# in a given data set
+#
+# Three inputs:
+# $1) A - input data
+# $2) K - row matrix that denotes the "kind" for each
+# attribute
+# kind=1 for scale, kind=2 for nominal,
+# kind=3 for ordinal
+# $3) maxC - maximum number of categories in any categorical
+# attribute
+#
+# One output:
+# $4) output directory in which following three statistics
+# files are created
+# + base.stats - matrix with all 17 statistics (14 scale,
+# 3 categorical) computed for all attributes
+# + categorical.counts - matrix in which each column
+# gives the category-wise counts for all categories in
+# that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3; # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+ print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+
+
+ # number of features/attributes
+ n = ncol(A);
+
+ # number of data records
+ m = nrow(A);
+
+ # number of statistics
+ numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+ max_kind = max(K);
+
+ # matrices to store computed statistics
+ baseStats = matrix(0, rows=numBaseStats, cols=n);
+
+ if (maxC > 0) {
+ countsArray = matrix(0, rows=maxC, cols=n);
+ }
+
+ parfor(i in 1:n, par=4, mode=LOCAL, check=0, opt=NONE) {
+
+ # project out the i^th column
+ F = A[,i];
+
+ kind = castAsScalar(K[1,i]);
+
+ if ( kind == 1 ) {
+ print("[" + i + "] Scale");
+ # compute SCALE statistics on the projected column
+ minimum = min(F);
+ maximum = max(F);
+ rng = maximum - minimum;
+
+ mu = mean(F);
+ m2 = moment(F, 2);
+ m3 = moment(F, 3);
+ m4 = moment(F, 4);
+
+ var = m/(m-1.0)*m2;
+ std_dev = sqrt(var);
+ se = std_dev/sqrt(m);
+ cv = std_dev/mu;
+
+ g1 = m3/(std_dev^3);
+ g2 = m4/(std_dev^4) - 3;
+ #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
+ se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
+
+ #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
+ se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
+
+ md = median(F); #quantile(F, 0.5);
+ iqm = interQuartileMean(F);
+
+ # place the computed statistics in output matrices
+ baseStats[1,i] = minimum;
+ baseStats[2,i] = maximum;
+ baseStats[3,i] = rng;
+
+ baseStats[4,i] = mu;
+ baseStats[5,i] = var;
+ baseStats[6,i] = std_dev;
+ baseStats[7,i] = se;
+ baseStats[8,i] = cv;
+
+ baseStats[9,i] = g1;
+ baseStats[10,i] = g2;
+ baseStats[11,i] = se_g1;
+ baseStats[12,i] = se_g2;
+
+ baseStats[13,i] = md;
+ baseStats[14,i] = iqm;
+ }
+ else {
+ if (kind == 2 | kind == 3) {
+ print("[" + i + "] Categorical");
+
+ # check if the categorical column has valid values
+ minF = min(F);
+ if (minF <=0) {
+ print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+ }
+ else {
+ # compute CATEGORICAL statistics on the projected column
+ cat_counts = table(F,1); # counts for each category
+ num_cat = nrow(cat_counts); # number of categories
+
+ mode = rowIndexMax(t(cat_counts));
+ mx = max(cat_counts)
+ modeArr = ppred(cat_counts, mx, "==")
+ numModes = sum(modeArr);
+
+ # place the computed statistics in output matrices
+ baseStats[15,i] = num_cat;
+ baseStats[16,i] = mode;
+ baseStats[17,i] = numModes;
+
+ if (max_kind > 1) {
+ countsArray[,i] = cat_counts;
+ }
+ }
+ }
+ }
+ }
+
+ write(baseStats, $4+"/base.stats");
+ if (max_kind > 1) {
+ write(countsArray, $4+"/categorical.counts");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_univariate4.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_univariate4.dml b/src/test/scripts/applications/parfor/parfor_univariate4.dml
index 1ebfcbd..8953c64 100644
--- a/src/test/scripts/applications/parfor/parfor_univariate4.dml
+++ b/src/test/scripts/applications/parfor/parfor_univariate4.dml
@@ -1,166 +1,166 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes
-# in a given data set
-#
-# Three inputs:
-# $1) A - input data
-# $2) K - row matrix that denotes the "kind" for each
-# attribute
-# kind=1 for scale, kind=2 for nominal,
-# kind=3 for ordinal
-# $3) maxC - maximum number of categories in any categorical
-# attribute
-#
-# One output:
-# $4) output directory in which following three statistics
-# files are created
-# + base.stats - matrix with all 17 statistics (14 scale,
-# 3 categorical) computed for all attributes
-# + categorical.counts - matrix in which each column
-# gives the category-wise counts for all categories in
-# that attribute
-#
-#
-
-A = read($1); # data file
-K = read($2); # attribute kind file
-maxC = $3; # max number of categories in any categorical attribute
-
-
-if (maxC < 0) {
- print("ERROR: maximum number maxC of categories must be a positve value.");
-}
-else {
-
-
- # number of features/attributes
- n = ncol(A);
-
- # number of data records
- m = nrow(A);
-
- # number of statistics
- numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
- max_kind = max(K);
-
- # matrices to store computed statistics
- baseStats = matrix(0, rows=numBaseStats, cols=n);
-
- if (maxC > 0) {
- countsArray = matrix(0, rows=maxC, cols=n);
- }
-
- parfor(i in 1:n, check=0) {
-
- # project out the i^th column
- F = A[,i];
-
- kind = castAsScalar(K[1,i]);
-
- if ( kind == 1 ) {
- print("[" + i + "] Scale");
- # compute SCALE statistics on the projected column
- minimum = min(F);
- maximum = max(F);
- rng = maximum - minimum;
-
- mu = mean(F);
- m2 = moment(F, 2);
- m3 = moment(F, 3);
- m4 = moment(F, 4);
-
- var = m/(m-1.0)*m2;
- std_dev = sqrt(var);
- se = std_dev/sqrt(m);
- cv = std_dev/mu;
-
- g1 = m3/(std_dev^3);
- g2 = m4/(std_dev^4) - 3;
- #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
- se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
-
- #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
- se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
-
- md = median(F) #quantile(F, 0.5);
- iqm = interQuartileMean(F);
-
- # place the computed statistics in output matrices
- baseStats[1,i] = minimum;
- baseStats[2,i] = maximum;
- baseStats[3,i] = rng;
-
- baseStats[4,i] = mu;
- baseStats[5,i] = var;
- baseStats[6,i] = std_dev;
- baseStats[7,i] = se;
- baseStats[8,i] = cv;
-
- baseStats[9,i] = g1;
- baseStats[10,i] = g2;
- baseStats[11,i] = se_g1;
- baseStats[12,i] = se_g2;
-
- baseStats[13,i] = md;
- baseStats[14,i] = iqm;
- }
- else {
- if (kind == 2 | kind == 3) {
- print("[" + i + "] Categorical");
-
- # check if the categorical column has valid values
- minF = min(F);
- if (minF <=0) {
- print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
- }
- else {
- # compute CATEGORICAL statistics on the projected column
- cat_counts = table(F,1); # counts for each category
- num_cat = nrow(cat_counts); # number of categories
-
- mode = rowIndexMax(t(cat_counts));
- mx = max(cat_counts)
- modeArr = ppred(cat_counts, mx, "==")
- numModes = sum(modeArr);
-
- # place the computed statistics in output matrices
- baseStats[15,i] = num_cat;
- baseStats[16,i] = mode;
- baseStats[17,i] = numModes;
-
- if (max_kind > 1) {
- countsArray[,i] = cat_counts;
- }
- }
- }
- }
- }
-
- write(baseStats, $4+"/base.stats");
- if (max_kind > 1) {
- write(countsArray, $4+"/categorical.counts");
- }
-
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes
+# in a given data set
+#
+# Three inputs:
+# $1) A - input data
+# $2) K - row matrix that denotes the "kind" for each
+# attribute
+# kind=1 for scale, kind=2 for nominal,
+# kind=3 for ordinal
+# $3) maxC - maximum number of categories in any categorical
+# attribute
+#
+# One output:
+# $4) output directory in which following three statistics
+# files are created
+# + base.stats - matrix with all 17 statistics (14 scale,
+# 3 categorical) computed for all attributes
+# + categorical.counts - matrix in which each column
+# gives the category-wise counts for all categories in
+# that attribute
+#
+#
+
+A = read($1); # data file
+K = read($2); # attribute kind file
+maxC = $3; # max number of categories in any categorical attribute
+
+
+if (maxC < 0) {
+ print("ERROR: maximum number maxC of categories must be a positve value.");
+}
+else {
+
+
+ # number of features/attributes
+ n = ncol(A);
+
+ # number of data records
+ m = nrow(A);
+
+ # number of statistics
+ numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+ max_kind = max(K);
+
+ # matrices to store computed statistics
+ baseStats = matrix(0, rows=numBaseStats, cols=n);
+
+ if (maxC > 0) {
+ countsArray = matrix(0, rows=maxC, cols=n);
+ }
+
+ parfor(i in 1:n, check=0) {
+
+ # project out the i^th column
+ F = A[,i];
+
+ kind = castAsScalar(K[1,i]);
+
+ if ( kind == 1 ) {
+ print("[" + i + "] Scale");
+ # compute SCALE statistics on the projected column
+ minimum = min(F);
+ maximum = max(F);
+ rng = maximum - minimum;
+
+ mu = mean(F);
+ m2 = moment(F, 2);
+ m3 = moment(F, 3);
+ m4 = moment(F, 4);
+
+ var = m/(m-1.0)*m2;
+ std_dev = sqrt(var);
+ se = std_dev/sqrt(m);
+ cv = std_dev/mu;
+
+ g1 = m3/(std_dev^3);
+ g2 = m4/(std_dev^4) - 3;
+ #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) );
+ se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) );
+
+ #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );
+ se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 );
+
+ md = median(F) #quantile(F, 0.5);
+ iqm = interQuartileMean(F);
+
+ # place the computed statistics in output matrices
+ baseStats[1,i] = minimum;
+ baseStats[2,i] = maximum;
+ baseStats[3,i] = rng;
+
+ baseStats[4,i] = mu;
+ baseStats[5,i] = var;
+ baseStats[6,i] = std_dev;
+ baseStats[7,i] = se;
+ baseStats[8,i] = cv;
+
+ baseStats[9,i] = g1;
+ baseStats[10,i] = g2;
+ baseStats[11,i] = se_g1;
+ baseStats[12,i] = se_g2;
+
+ baseStats[13,i] = md;
+ baseStats[14,i] = iqm;
+ }
+ else {
+ if (kind == 2 | kind == 3) {
+ print("[" + i + "] Categorical");
+
+ # check if the categorical column has valid values
+ minF = min(F);
+ if (minF <=0) {
+ print("ERROR: Categorical attributes can only take values starting from 1. Encountered a value " + minF + " in attribute " + i);
+ }
+ else {
+ # compute CATEGORICAL statistics on the projected column
+ cat_counts = table(F,1); # counts for each category
+ num_cat = nrow(cat_counts); # number of categories
+
+ mode = rowIndexMax(t(cat_counts));
+ mx = max(cat_counts)
+ modeArr = ppred(cat_counts, mx, "==")
+ numModes = sum(modeArr);
+
+ # place the computed statistics in output matrices
+ baseStats[15,i] = num_cat;
+ baseStats[16,i] = mode;
+ baseStats[17,i] = numModes;
+
+ if (max_kind > 1) {
+ countsArray[,i] = cat_counts;
+ }
+ }
+ }
+ }
+ }
+
+ write(baseStats, $4+"/base.stats");
+ if (max_kind > 1) {
+ write(countsArray, $4+"/categorical.counts");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/validation/LinearLogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/LinearLogisticRegression.dml b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
index b5d3955..473b2dc 100644
--- a/src/test/scripts/applications/validation/LinearLogisticRegression.dml
+++ b/src/test/scripts/applications/validation/LinearLogisticRegression.dml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Solves Linear Logistic Regression using Trust Region methods.
-# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
-# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
-# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
-# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
-# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
-
-# 100K dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
-
-# 1M dataset
-# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
-
-
-# internal parameters
-tol = 0.001
-eta0 = 0.0001
-eta1 = 0.25
-eta2 = 0.75
-sigma1 = 0.25
-sigma2 = 0.5
-sigma3 = 4.0
-psi = 0.1
-
-# read training data files
-X = read($1)
-intercept = $3
-
-D = ncol(X)
-#initialize w
-w = Rand(rows=D, cols=1, min=0.0, max=0.0);
-zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
-
-if (intercept == 1) {
- num_samples = nrow(X);
- ones = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
- X = append(X, ones);
- zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
- w = t(append(t(w), zero_matrix));
- zeros_D = t(append(t(zeros_D), zero_matrix));
-}
-
-N = nrow(X)
-
-# read (training and test) labels
-y = read($2)
-
-maxiter = $4
-maxinneriter = 1000
-
-C = $5
-
-e = Rand(rows=1, cols=1, min=1.0, max=1.0);
-o = X %*% w
-logistic = 1.0/(1.0 + exp( -y * o))
-
-obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
-grad = w + C*t(X) %*% ((logistic - 1)*y)
-logisticD = logistic*(1-logistic)
-delta = sqrt(sum(grad*grad))
-
-# number of iterations
-iter = 0
-
-# starting point for CG
-
-# VS: change
-zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
-
-# boolean for convergence check
-
-converge = (delta < tol) | (iter > maxiter)
-norm_r2 = sum(grad*grad)
-
-# VS: change
-norm_grad = sqrt(norm_r2)
-norm_grad_initial = norm_grad
-
-alpha = t(w) %*% w
-alpha2 = alpha
-
-while(!converge) {
-
- norm_grad = sqrt(sum(grad*grad))
-
- print("-- Outer Iteration = " + iter)
- objScalar = castAsScalar(obj)
- print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
-
- # SOLVE TRUST REGION SUB-PROBLEM
- s = zeros_D
- os = zeros_N
- r = -grad
- d = r
- inneriter = 0
- innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
- while (!innerconverge) {
- inneriter = inneriter + 1
- norm_r2 = sum(r*r)
- od = X %*% d
- Hd = d + C*(t(X) %*% (logisticD*od))
- alpha_deno = t(d) %*% Hd
- alpha = norm_r2 / alpha_deno
-
- s = s + castAsScalar(alpha) * d
- os = os + castAsScalar(alpha) * od
-
- sts = t(s) %*% s
- delta2 = delta*delta
- stsScalar = castAsScalar(sts)
-
- shouldBreak = FALSE; # to mimic "break" in the following 'if' condition
- if (stsScalar > delta2) {
- print(" --- cg reaches trust region boundary")
- s = s - castAsScalar(alpha) * d
- os = os - castAsScalar(alpha) * od
- std = t(s) %*% d
- dtd = t(d) %*% d
- sts = t(s) %*% s
- rad = sqrt(std*std + dtd*(delta2 - sts))
- stdScalar = castAsScalar(std)
- if(stdScalar >= 0) {
- tau = (delta2 - sts)/(std + rad)
- }
- else {
- tau = (rad - std)/dtd
- }
-
- s = s + castAsScalar(tau) * d
- os = os + castAsScalar(tau) * od
- r = r - castAsScalar(tau) * Hd
-
- #break
- shouldBreak = TRUE;
- innerconverge = TRUE;
-
- }
-
- if (!shouldBreak) {
- r = r - castAsScalar(alpha) * Hd
- old_norm_r2 = norm_r2
- norm_r2 = sum(r*r)
- beta = norm_r2/old_norm_r2
- d = r + beta*d
- innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
- }
- }
-
- print(" --- Inner CG Iteration = " + inneriter)
- # END TRUST REGION SUB-PROBLEM
- # compute rho, update w, obtain delta
- gs = t(s) %*% grad
- qk = -0.5*(gs - (t(s) %*% r))
-
- wnew = w + s
- onew = o + os
- logisticnew = 1.0/(1.0 + exp(-y * onew ))
- objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
-
- actred = (obj - objnew)
- actredScalar = castAsScalar(actred)
- rho = actred / qk
- qkScalar = castAsScalar(qk)
- rhoScalar = castAsScalar(rho);
- snorm = sqrt(sum( s * s ))
-
- print(" Actual = " + actredScalar)
- print(" Predicted = " + qkScalar)
-
- if (iter==0) {
- delta = min(delta, snorm)
- }
- alpha2 = objnew - obj - gs
- alpha2Scalar = castAsScalar(alpha2)
- if (alpha2Scalar <= 0) {
- alpha = sigma3*e
- }
- else {
- ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)
- alpha = ascalar*e
- }
-
- if (rhoScalar > eta0) {
-
- w = wnew
- o = onew
- grad = w + C*t(X) %*% ((logisticnew - 1) * y )
- norm_grad = sqrt(sum(grad*grad))
- logisticD = logisticnew * (1 - logisticnew)
- obj = objnew
- }
-
- alphaScalar = castAsScalar(alpha)
- if (rhoScalar < eta0){
- delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
- }
- else {
- if (rhoScalar < eta1){
- delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta))
- }
- else {
- if (rhoScalar < eta2) {
- delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
- }
- else {
- delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
- }
- }
- }
-
- o2 = y * o
- correct = sum(ppred(o2, 0, ">"))
- accuracy = correct*100.0/N
- iter = iter + 1
- #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
- converge = (norm_grad < tol) | (iter > maxiter)
-
- print(" Delta = " + delta)
- print(" Training Accuracy = " + accuracy)
- print(" Correct = " + correct)
- print(" OuterIter = " + iter)
- print(" Converge = " + converge)
-}
-
-write(w, $6, format="text");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Solves Linear Logistic Regression using Trust Region methods.
+# Can be adapted for L2-SVMs and more general unconstrained optimization problems also
+# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650)
+# The parameter C is the weight that the algorithm puts on the loss function, instead of the regularizer.
+# if intercept = 1, then w has one extra value than the dimensions of X. Predictions are computed as X*w[1:n-1,1] + w[n,1]
+# Arguments: 1.X 2.y 3.intercept 4.max_iteration 5.C 6.w
+
+# 100K dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100k_500 itau/logreg/y_100k 0 50 0.001 itau/logreg/w_100k_1
+
+# 1M dataset
+# hadoop jar SystemML.jar -f LinearLogisticRegression.dml -args itau/logreg/X_100m_5k itau/logreg/y_100m_1 0 50 0.001 itau/demo/logreg/w_100m_1
+
+
+# internal parameters
+tol = 0.001
+eta0 = 0.0001
+eta1 = 0.25
+eta2 = 0.75
+sigma1 = 0.25
+sigma2 = 0.5
+sigma3 = 4.0
+psi = 0.1
+
+# read training data files
+X = read($1)
+intercept = $3
+
+D = ncol(X)
+#initialize w
+w = Rand(rows=D, cols=1, min=0.0, max=0.0);
+zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0);
+
+if (intercept == 1) {
+ num_samples = nrow(X);
+ ones = Rand(rows=num_samples, cols=1, min=1, max=1, pdf="uniform");
+ X = append(X, ones);
+ zero_matrix = Rand(rows=1, cols=1, min=0.0, max=0.0);
+ w = t(append(t(w), zero_matrix));
+ zeros_D = t(append(t(zeros_D), zero_matrix));
+}
+
+N = nrow(X)
+
+# read (training and test) labels
+y = read($2)
+
+maxiter = $4
+maxinneriter = 1000
+
+C = $5
+
+e = Rand(rows=1, cols=1, min=1.0, max=1.0);
+o = X %*% w
+logistic = 1.0/(1.0 + exp( -y * o))
+
+obj = 0.5 * t(w) %*% w + C*sum(-log(logistic))
+grad = w + C*t(X) %*% ((logistic - 1)*y)
+logisticD = logistic*(1-logistic)
+delta = sqrt(sum(grad*grad))
+
+# number of iterations
+iter = 0
+
+# starting point for CG
+
+# VS: change
+zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0);
+
+# boolean for convergence check
+
+converge = (delta < tol) | (iter > maxiter)
+norm_r2 = sum(grad*grad)
+
+# VS: change
+norm_grad = sqrt(norm_r2)
+norm_grad_initial = norm_grad
+
+alpha = t(w) %*% w
+alpha2 = alpha
+
+while(!converge) {
+
+ norm_grad = sqrt(sum(grad*grad))
+
+ print("-- Outer Iteration = " + iter)
+ objScalar = castAsScalar(obj)
+ print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad)
+
+ # SOLVE TRUST REGION SUB-PROBLEM
+ s = zeros_D
+ os = zeros_N
+ r = -grad
+ d = r
+ inneriter = 0
+ innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad)
+ while (!innerconverge) {
+ inneriter = inneriter + 1
+ norm_r2 = sum(r*r)
+ od = X %*% d
+ Hd = d + C*(t(X) %*% (logisticD*od))
+ alpha_deno = t(d) %*% Hd
+ alpha = norm_r2 / alpha_deno
+
+ s = s + castAsScalar(alpha) * d
+ os = os + castAsScalar(alpha) * od
+
+ sts = t(s) %*% s
+ delta2 = delta*delta
+ stsScalar = castAsScalar(sts)
+
+ shouldBreak = FALSE; # to mimic "break" in the following 'if' condition
+ if (stsScalar > delta2) {
+ print(" --- cg reaches trust region boundary")
+ s = s - castAsScalar(alpha) * d
+ os = os - castAsScalar(alpha) * od
+ std = t(s) %*% d
+ dtd = t(d) %*% d
+ sts = t(s) %*% s
+ rad = sqrt(std*std + dtd*(delta2 - sts))
+ stdScalar = castAsScalar(std)
+ if(stdScalar >= 0) {
+ tau = (delta2 - sts)/(std + rad)
+ }
+ else {
+ tau = (rad - std)/dtd
+ }
+
+ s = s + castAsScalar(tau) * d
+ os = os + castAsScalar(tau) * od
+ r = r - castAsScalar(tau) * Hd
+
+ #break
+ shouldBreak = TRUE;
+ innerconverge = TRUE;
+
+ }
+
+ if (!shouldBreak) {
+ r = r - castAsScalar(alpha) * Hd
+ old_norm_r2 = norm_r2
+ norm_r2 = sum(r*r)
+ beta = norm_r2/old_norm_r2
+ d = r + beta*d
+ innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter)
+ }
+ }
+
+ print(" --- Inner CG Iteration = " + inneriter)
+ # END TRUST REGION SUB-PROBLEM
+ # compute rho, update w, obtain delta
+ gs = t(s) %*% grad
+ qk = -0.5*(gs - (t(s) %*% r))
+
+ wnew = w + s
+ onew = o + os
+ logisticnew = 1.0/(1.0 + exp(-y * onew ))
+ objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew))
+
+ actred = (obj - objnew)
+ actredScalar = castAsScalar(actred)
+ rho = actred / qk
+ qkScalar = castAsScalar(qk)
+ rhoScalar = castAsScalar(rho);
+ snorm = sqrt(sum( s * s ))
+
+ print(" Actual = " + actredScalar)
+ print(" Predicted = " + qkScalar)
+
+ if (iter==0) {
+ delta = min(delta, snorm)
+ }
+ alpha2 = objnew - obj - gs
+ alpha2Scalar = castAsScalar(alpha2)
+ if (alpha2Scalar <= 0) {
+ alpha = sigma3*e
+ }
+ else {
+ ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar)
+ alpha = ascalar*e
+ }
+
+ if (rhoScalar > eta0) {
+
+ w = wnew
+ o = onew
+ grad = w + C*t(X) %*% ((logisticnew - 1) * y )
+ norm_grad = sqrt(sum(grad*grad))
+ logisticD = logisticnew * (1 - logisticnew)
+ obj = objnew
+ }
+
+ alphaScalar = castAsScalar(alpha)
+ if (rhoScalar < eta0){
+ delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta )
+ }
+ else {
+ if (rhoScalar < eta1){
+ delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta))
+ }
+ else {
+ if (rhoScalar < eta2) {
+ delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta))
+ }
+ else {
+ delta = max(delta, min( alphaScalar * snorm, sigma3 * delta))
+ }
+ }
+ }
+
+ o2 = y * o
+ correct = sum(ppred(o2, 0, ">"))
+ accuracy = correct*100.0/N
+ iter = iter + 1
+ #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter)
+ converge = (norm_grad < tol) | (iter > maxiter)
+
+ print(" Delta = " + delta)
+ print(" Training Accuracy = " + accuracy)
+ print(" Correct = " + correct)
+ print(" OuterIter = " + iter)
+ print(" Converge = " + converge)
+}
+
+write(w, $6, format="text");
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
index d06de67..b42a315 100644
--- a/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
+++ b/src/test/scripts/applications/validation/genRandData4LogisticRegression.dml
@@ -1,122 +1,122 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data to test linear logistic regression
-
-# 100K dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
-
-# 1M dataset
-# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is the mean of the linear form (w^T X)
-# $4 is the st.dev. of the linear form (w^T X)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
-# $9 is 0 if no intercept and 1 if there is intercept
-# $10 controls sparsity in the generated data
-
-numSamples = $1
-numFeatures = $2
-meanLF = $3
-sigmaLF = $4
-addNoise = $8
-b = $9
-
-X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
-w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-
-if (b != 0) {
- b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
- X = append (X, b_mat);
- numFeatures_plus_one = numFeatures + 1;
- w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
-}
-
-[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
-if (sigmaLF != new_sigmaLF) {
- print ("The standard deviation requirement on the linear form is TOO TIGHT!");
- print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + ".");
-}
-ot = X %*% w;
-
-if (b != 0) {
- X = X [, 1:numFeatures];
-}
-
-emp_meanLF = sum (ot) / numSamples;
-emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
-print ("Empirical meanLF = " + emp_meanLF + "; Empirical sigmaLF = " + emp_sigmaLF);
-
-prob = 1 / (1 + exp (- ot));
-
-if(addNoise == 1){
- r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
- print("this data generator generates the same dataset for both noise=0 and noise=1")
- r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
- #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
-}
-
-print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + "; nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
-
-Y = 1 - 2*ppred(prob, r, "<")
-
-write (w, $5, format="text");
-write (X, $6, format="binary");
-write (Y, $7, format="binary");
-
-
-# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
-# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
-# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
-scaleWeights =
- function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
- return (Matrix[double] w_scaled, double new_sigmaLF)
-{
- numFeatures = nrow (w_unscaled);
- W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
- W_ext [, 1] = w_unscaled;
- S1 = colSums (X_data %*% W_ext);
- TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
- TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
- TF [1, 2] = S1 [1, 2];
- TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
- TF [2, 2] = - S1 [1, 1];
- TF = W_ext %*% TF;
- Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
- Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
- new_sigmaLF = sigmaLF;
- discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
- if (discr > 0.0) {
- new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
- discr = -0.0;
- }
- t = Rand (rows = 2, cols = 1, min = 1, max = 1);
- t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
- w_scaled = TF %*% t;
-}
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data to test linear logistic regression
+
+# 100K dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 100000 500 0.0 5.0 itau/logreg/w_100k itau/logreg/X_100k_500 itau/logreg/y_100k 0 0 0.01
+
+# 1M dataset
+# hadoop jar SystemML.jar -f genRandData4LogisticRegression.dml -args 1000000 1000 0.0 5.0 itau/logreg/w_1m itau/logreg/X_1m_1k /logreg/y_1m 0 0 0.0001
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is the mean of the linear form (w^T X)
+# $4 is the st.dev. of the linear form (w^T X)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
+# $9 is 0 if no intercept and 1 if there is intercept
+# $10 controls sparsity in the generated data
+
+numSamples = $1
+numFeatures = $2
+meanLF = $3
+sigmaLF = $4
+addNoise = $8
+b = $9
+
+X = Rand (rows=numSamples, cols=numFeatures, min=-1, max=2, pdf="uniform", seed=0, sparsity=$10);
+w = Rand (rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+
+if (b != 0) {
+ b_mat = Rand (rows=numSamples, cols=1, min=1, max=1);
+ X = append (X, b_mat);
+ numFeatures_plus_one = numFeatures + 1;
+ w = Rand (rows=numFeatures_plus_one, cols=1, min=-1, max=1, pdf="uniform", seed=0);
+}
+
+[w, new_sigmaLF] = scaleWeights (X, w, meanLF, sigmaLF);
+if (sigmaLF != new_sigmaLF) {
+ print ("The standard deviation requirement on the linear form is TOO TIGHT!");
+ print ("We relaxed sigmaLF from " + sigmaLF + " to " + new_sigmaLF + ".");
+}
+ot = X %*% w;
+
+if (b != 0) {
+ X = X [, 1:numFeatures];
+}
+
+emp_meanLF = sum (ot) / numSamples;
+emp_sigmaLF = sqrt (sum (ot * ot) / numSamples - emp_meanLF * emp_meanLF);
+print ("Empirical meanLF = " + emp_meanLF + "; Empirical sigmaLF = " + emp_sigmaLF);
+
+prob = 1 / (1 + exp (- ot));
+
+if(addNoise == 1){
+ r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+}else{
+ print("this data generator generates the same dataset for both noise=0 and noise=1")
+ r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+ #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
+}
+
+print ("nrow(prob) = " + nrow(prob) + ", ncol(prob) = " + ncol(prob) + "; nrow(r) = " + nrow(r) + ", ncol(r) = " + ncol(r));
+
+Y = 1 - 2*ppred(prob, r, "<")
+
+write (w, $5, format="text");
+write (X, $6, format="binary");
+write (Y, $7, format="binary");
+
+
+# Shifts and scales the weights to ensure the desired statistics for Linear Form = w^T X
+# Used in data and/or weight generation in the testing of GLM, Logistic Regression etc.
+# new_sigmaLF == sigmaLF if successful, new_sigmaLF > sigmaLF if had to relax this constraint
+scaleWeights =
+ function (Matrix[double] X_data, Matrix[double] w_unscaled, double meanLF, double sigmaLF)
+ return (Matrix[double] w_scaled, double new_sigmaLF)
+{
+ numFeatures = nrow (w_unscaled);
+ W_ext = Rand (rows = numFeatures, cols = 2, min = 1, max = 1);
+ W_ext [, 1] = w_unscaled;
+ S1 = colSums (X_data %*% W_ext);
+ TF = Rand (rows = 2, cols = 2, min = 1, max = 1);
+ TF [1, 1] = S1 [1, 1] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+ TF [1, 2] = S1 [1, 2];
+ TF [2, 1] = S1 [1, 2] * meanLF * nrow (X_data) / castAsScalar (S1 %*% t(S1));
+ TF [2, 2] = - S1 [1, 1];
+ TF = W_ext %*% TF;
+ Q = t(TF) %*% t(X_data) %*% X_data %*% TF;
+ Q [1, 1] = Q [1, 1] - nrow (X_data) * meanLF * meanLF;
+ new_sigmaLF = sigmaLF;
+ discr = castAsScalar (Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1] - nrow (X_data) * Q [2, 2] * sigmaLF * sigmaLF);
+ if (discr > 0.0) {
+ new_sigmaLF = sqrt (castAsScalar ((Q [1, 1] * Q [2, 2] - Q [1, 2] * Q [2, 1]) / (nrow (X_data) * Q [2, 2])));
+ discr = -0.0;
+ }
+ t = Rand (rows = 2, cols = 1, min = 1, max = 1);
+ t [2, 1] = (- Q [1, 2] + sqrt (- discr)) / Q [2, 2];
+ w_scaled = TF %*% t;
+}
+
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.R b/src/test/scripts/applications/welchTTest/welchTTest.R
index 66da912..ff74d71 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.R
+++ b/src/test/scripts/applications/welchTTest/welchTTest.R
@@ -1,49 +1,49 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-library(Matrix)
-
-posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
-negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
-t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
-degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
-
-writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
-writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+posSamples = readMM(paste(args[1], "posSamples.mtx", sep=""))
+negSamples = readMM(paste(args[1], "negSamples.mtx", sep=""))
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+#R will write a vector as a 1-column matrix, forcing it to write a 1-row matrix
+t_statistics_mat = matrix(t_statistics, 1, length(t_statistics))
+degrees_of_freedom_mat = matrix(degrees_of_freedom, 1, length(degrees_of_freedom))
+
+writeMM(as(t_statistics_mat, "CsparseMatrix"), paste(args[2], "t_statistics", sep=""))
+writeMM(as(degrees_of_freedom_mat, "CsparseMatrix"), paste(args[2], "degrees_of_freedom", sep=""))
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.dml b/src/test/scripts/applications/welchTTest/welchTTest.dml
index 4e42f03..7bc0144 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.dml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.dml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = read($1, format="text")
-negSamples = read($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
-
-write(t_statistics, $3, format="text")
-write(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = read($1, format="text")
+negSamples = read($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples^2) - posSampleSize * posSampleMeans^2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples^2) - negSampleSize * negSampleMeans^2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ^ 2) / (posSampleVariances^2/(posSampleSize^2 * (posSampleSize-1)) + negSampleVariances^2/(negSampleSize^2 * (negSampleSize-1))))
+
+write(t_statistics, $3, format="text")
+write(degrees_of_freedom, $4, format="text")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/welchTTest/welchTTest.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/welchTTest/welchTTest.pydml b/src/test/scripts/applications/welchTTest/welchTTest.pydml
index abfff3a..5dbd049 100644
--- a/src/test/scripts/applications/welchTTest/welchTTest.pydml
+++ b/src/test/scripts/applications/welchTTest/welchTTest.pydml
@@ -1,43 +1,43 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-posSamples = load($1, format="text")
-negSamples = load($2, format="text")
-
-#computing sample sizes
-posSampleSize = nrow(posSamples)
-negSampleSize = nrow(negSamples)
-
-#computing means
-posSampleMeans = colMeans(posSamples)
-negSampleMeans = colMeans(negSamples)
-
-#computing (unbiased) variances
-posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
-negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
-
-#computing t-statistics and degrees of freedom
-t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
-degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
-
-save(t_statistics, $3, format="text")
-save(degrees_of_freedom, $4, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+posSamples = load($1, format="text")
+negSamples = load($2, format="text")
+
+#computing sample sizes
+posSampleSize = nrow(posSamples)
+negSampleSize = nrow(negSamples)
+
+#computing means
+posSampleMeans = colMeans(posSamples)
+negSampleMeans = colMeans(negSamples)
+
+#computing (unbiased) variances
+posSampleVariances = (colSums(posSamples ** 2) - posSampleSize * posSampleMeans ** 2) / (posSampleSize-1)
+negSampleVariances = (colSums(negSamples ** 2) - negSampleSize * negSampleMeans ** 2) / (negSampleSize-1)
+
+#computing t-statistics and degrees of freedom
+t_statistics = (posSampleMeans - negSampleMeans) / sqrt(posSampleVariances/posSampleSize + negSampleVariances/negSampleSize)
+degrees_of_freedom = round(((posSampleVariances/posSampleSize + negSampleVariances/negSampleSize) ** 2) / (posSampleVariances ** 2/((posSampleSize ** 2) * (posSampleSize-1)) + (negSampleVariances ** 2)/((negSampleSize ** 2) * (negSampleSize-1))))
+
+save(t_statistics, $3, format="text")
+save(degrees_of_freedom, $4, format="text")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/functions/aggregate/AllMax.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMax.R b/src/test/scripts/functions/aggregate/AllMax.R
index 3ed23d6..73cde8d 100644
--- a/src/test/scripts/functions/aggregate/AllMax.R
+++ b/src/test/scripts/functions/aggregate/AllMax.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(max(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(max(A));
+
writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep=""));
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/functions/aggregate/AllMean.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/AllMean.R b/src/test/scripts/functions/aggregate/AllMean.R
index 4e315a4..07ee1a9 100644
--- a/src/test/scripts/functions/aggregate/AllMean.R
+++ b/src/test/scripts/functions/aggregate/AllMean.R
@@ -1,29 +1,29 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
-B <- as.matrix(mean(A));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(mean(A));
+
writeMM(as(B, "CsparseMatrix"), paste(args[2], "B", sep=""));
\ No newline at end of file