You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/04/10 06:06:17 UTC
[2/2] incubator-systemml git commit: [SYSTEMML-1363] Rework codegen
algorithm tests (use existing algorithms)
[SYSTEMML-1363] Rework codegen algorithm tests (use existing algorithms)
This patch cleans up the codegen algorithm tests (GLM, Kmeans, L2SVM,
MSVM, LinregCG, Mlogreg, and PNMF) by removing duplicated - and already
outdated - algorithm DML scripts and updating the respective R
comparison scripts to match recent modifications. Furthermore, this also
moves PNMF to staging algorithms for global reference and because it is
easier to find than buried in our testsuite.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d9c6acb3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d9c6acb3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d9c6acb3
Branch: refs/heads/master
Commit: d9c6acb3a41a1e49793467cf02298991bcd1ea20
Parents: f788b42
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Apr 9 23:07:42 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Apr 9 23:07:42 2017 -0700
----------------------------------------------------------------------
scripts/staging/PNMF.dml | 40 +
.../functions/codegen/AlgorithmGLM.java | 17 +-
.../functions/codegen/AlgorithmKMeans.java | 8 +-
.../functions/codegen/AlgorithmL2SVM.java | 11 +-
.../functions/codegen/AlgorithmLinregCG.java | 13 +-
.../functions/codegen/AlgorithmMLogreg.java | 11 +-
.../functions/codegen/AlgorithmMSVM.java | 11 +-
.../functions/codegen/AlgorithmPNMF.java | 11 +-
.../scripts/functions/codegen/Algorithm_GLM.dml | 1053 ------------------
.../functions/codegen/Algorithm_Kmeans.dml | 243 ----
.../scripts/functions/codegen/Algorithm_L2SVM.R | 18 +-
.../functions/codegen/Algorithm_L2SVM.dml | 106 --
.../functions/codegen/Algorithm_LinregCG.R | 138 ++-
.../functions/codegen/Algorithm_LinregCG.dml | 56 -
.../functions/codegen/Algorithm_MLogreg.dml | 274 -----
.../scripts/functions/codegen/Algorithm_MSVM.R | 2 +-
.../functions/codegen/Algorithm_MSVM.dml | 150 ---
.../functions/codegen/Algorithm_PNMF.dml | 40 -
18 files changed, 206 insertions(+), 1996 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/scripts/staging/PNMF.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/PNMF.dml b/scripts/staging/PNMF.dml
new file mode 100644
index 0000000..641cc09
--- /dev/null
+++ b/scripts/staging/PNMF.dml
@@ -0,0 +1,40 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1);
+W = read($2);
+H = read($3);
+
+k = $4;
+eps = $5;
+max_iter = $6;
+iter = 1;
+
+while( iter < max_iter ) {
+ H = (H*(t(W)%*%(X/(W%*%H+eps)))) / t(colSums(W));
+ W = (W*((X/(W%*%H+eps))%*%t(H))) / t(rowSums(H));
+ obj = sum(W%*%H) - sum(X*log(W%*%H+eps));
+ print("iter=" + iter + " obj=" + obj);
+ iter = iter + 1;
+}
+
+write(W, $7);
+write(H, $8);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
index dac2be0..803ec93 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
@@ -144,25 +144,24 @@ public class AlgorithmGLM extends AutomatedTestBase
loadTestConfiguration(config);
String[] addArgs = new String[4];
+ String param4Name = "lpow=";
switch(type) {
- case POISSON_LOG: //dfam, vpow, link, vpow
+ case POISSON_LOG: //dfam, vpow, link, lpow
addArgs[0] = "1"; addArgs[1] = "1.0"; addArgs[2] = "1"; addArgs[3] = "0.0";
break;
- case GAMMA_LOG: //dfam, vpow, link, vpow
+ case GAMMA_LOG: //dfam, vpow, link, lpow
addArgs[0] = "1"; addArgs[1] = "2.0"; addArgs[2] = "1"; addArgs[3] = "0.0";
break;
case BINOMIAL_PROBIT: //dfam, vpow, link, yneg
addArgs[0] = "2"; addArgs[1] = "0.0"; addArgs[2] = "3"; addArgs[3] = "2";
+ param4Name = "yneg=";
break;
}
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("Y"),
- String.valueOf(intercept), String.valueOf(epsilon), String.valueOf(maxiter),
- addArgs[0], addArgs[1], addArgs[2], addArgs[3], output("w")};
+ fullDMLScriptName = "scripts/algorithms/GLM.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"),
+ "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon), "moi="+String.valueOf(maxiter),
+ "dfam="+addArgs[0], "vpow="+addArgs[1], "link="+addArgs[2], param4Name+addArgs[3], "B="+output("w")};
rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
String.valueOf(maxiter), addArgs[0], addArgs[1], addArgs[2], addArgs[3], expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
index 7ed9b2a..705ee65 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
@@ -156,12 +156,10 @@ public class AlgorithmKMeans extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ fullDMLScriptName = "scripts/algorithms/Kmeans.dml";
programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), String.valueOf(centroids),
- String.valueOf(runs), String.valueOf(epsilon), String.valueOf(maxiter), output("C")};
+ "-nvargs", "X="+input("X"), "k="+String.valueOf(centroids), "runs="+String.valueOf(runs),
+ "tol="+String.valueOf(epsilon), "maxi="+String.valueOf(maxiter), "C="+output("C")};
//rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
// String.valueOf(maxiter), expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
index 7481022..6f005ad 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
@@ -120,13 +120,10 @@ public class AlgorithmL2SVM extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("Y"),
- String.valueOf(intercept), String.valueOf(epsilon),
- String.valueOf(maxiter), output("w")};
+ fullDMLScriptName = "scripts/algorithms/l2-svm.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"),
+ "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon), "reg=0.001",
+ "maxiter="+String.valueOf(maxiter), "model="+output("w"), "Log=\" \""};
rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
String.valueOf(maxiter), expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
index dacc6ee..729699f 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
@@ -111,16 +111,13 @@ public class AlgorithmLinregCG extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("y"),
- String.valueOf(intercept), String.valueOf(epsilon),
- String.valueOf(maxiter), output("w")};
+ fullDMLScriptName = "scripts/algorithms/LinearRegCG.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("y"),
+ "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon),
+ "maxi="+String.valueOf(maxiter), "reg=0.001", "B="+output("w")};
rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
- String.valueOf(maxiter), expectedDir());
+ String.valueOf(maxiter), "0.001", expectedDir());
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
index 20c0311..2e06940 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
@@ -159,13 +159,10 @@ public class AlgorithmMLogreg extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("Y"),
- String.valueOf(intercept), String.valueOf(epsilon),
- String.valueOf(maxiter), output("w")};
+ fullDMLScriptName = "scripts/algorithms/MultiLogReg.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"),
+ "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon),
+ "moi="+String.valueOf(maxiter), "reg=0.001", "B="+output("w")};
rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
String.valueOf(maxiter), expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
index ba8ac45..7f26d70 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
@@ -119,13 +119,10 @@ public class AlgorithmMSVM extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("Y"),
- String.valueOf(intercept), String.valueOf(epsilon),
- String.valueOf(maxiter), output("w")};
+ fullDMLScriptName = "scripts/algorithms/m-svm.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"),
+ "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon), "reg=0.001",
+ "maxiter="+String.valueOf(maxiter), "model="+output("w"), "Log=\" \""};
rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
String.valueOf(maxiter), expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
index 878cba1..9a8d932 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
@@ -99,13 +99,10 @@ public class AlgorithmPNMF extends AutomatedTestBase
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- /* This is for running the junit test the new way, i.e., construct the arguments directly */
- String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[]{ "-explain", "-stats",
- "-args", input("X"), input("W"), input("H"),
- String.valueOf(rank), String.valueOf(epsilon), String.valueOf(maxiter),
- output("W"), output("H")};
+ fullDMLScriptName = "scripts/staging/PNMF.dml";
+ programArgs = new String[]{ "-explain", "-stats", "-args", input("X"),
+ input("W"), input("H"), String.valueOf(rank), String.valueOf(epsilon),
+ String.valueOf(maxiter), output("W"), output("H")};
rCmd = getRCmd(inputDir(), String.valueOf(rank), String.valueOf(epsilon),
String.valueOf(maxiter), expectedDir());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_GLM.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_GLM.dml b/src/test/scripts/functions/codegen/Algorithm_GLM.dml
deleted file mode 100644
index d8eb966..0000000
--- a/src/test/scripts/functions/codegen/Algorithm_GLM.dml
+++ /dev/null
@@ -1,1053 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-X = read ($1);
-Y = read ($2);
-
-fileO = " ";
-fileLog = " ";
-
-intercept_status = $3
-eps = $4
-max_iteration_IRLS = $5;
-max_iteration_CG = $5;
-
-distribution_type = $6;
-variance_as_power_of_the_mean = $7;
-link_type = $8;
-
-if( distribution_type != 1 ) {
- link_as_power_of_the_mean = $9;
- bernoulli_No_label = 0.0;
-} else {
- link_as_power_of_the_mean = 1.0;
- bernoulli_No_label = $9;
-}
-
-dispersion = 0.0;
-regularization = 0.001;
-
-
-variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
-link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
-bernoulli_No_label = as.double (bernoulli_No_label);
-dispersion = as.double (dispersion);
-eps = as.double (eps);
-
-
-# Default values for output statistics:
-
-termination_code = 0;
-min_beta = 0.0 / 0.0;
-i_min_beta = 0.0 / 0.0;
-max_beta = 0.0 / 0.0;
-i_max_beta = 0.0 / 0.0;
-intercept_value = 0.0 / 0.0;
-dispersion = 0.0 / 0.0;
-estimated_dispersion = 0.0 / 0.0;
-deviance_nodisp = 0.0 / 0.0;
-deviance = 0.0 / 0.0;
-
-print("BEGIN GLM SCRIPT");
-
-num_records = nrow (X);
-num_features = ncol (X);
-zeros_r = matrix (0, rows = num_records, cols = 1);
-ones_r = 1 + zeros_r;
-
-# Introduce the intercept, shift and rescale the columns of X if needed
-
-if (intercept_status == 1 | intercept_status == 2) # add the intercept column
-{
- X = append (X, ones_r);
- num_features = ncol (X);
-}
-
-scale_lambda = matrix (1, rows = num_features, cols = 1);
-if (intercept_status == 1 | intercept_status == 2)
-{
- scale_lambda [num_features, 1] = 0;
-}
-
-if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
-{ # Important assumption: X [, num_features] = ones_r
- avg_X_cols = t(colSums(X)) / num_records;
- var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
- scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
- scale_X [num_features, 1] = 1;
- shift_X = - avg_X_cols * scale_X;
- shift_X [num_features, 1] = 0;
- rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);
-} else {
- scale_X = matrix (1, rows = num_features, cols = 1);
- shift_X = matrix (0, rows = num_features, cols = 1);
- rowSums_X_sq = rowSums (X ^ 2);
-}
-
-# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2)
-# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale.
-# The transform is then associatively applied to the other side of the expression,
-# and is rewritten via "scale_X" and "shift_X" as follows:
-#
-# ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
-# ssX_A = diag (scale_X) %*% A;
-# ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
-#
-# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
-# tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
-
-# Initialize other input-dependent parameters
-
-lambda = scale_lambda * regularization;
-if (max_iteration_CG == 0) {
- max_iteration_CG = num_features;
-}
-
-# In Bernoulli case, convert one-column "Y" into two-column
-
-if (distribution_type == 2 & ncol(Y) == 1)
-{
- is_Y_negative = ppred (Y, bernoulli_No_label, "==");
- Y = append (1 - is_Y_negative, is_Y_negative);
- count_Y_negative = sum (is_Y_negative);
- if (count_Y_negative == 0) {
- stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none encode NO-label");
- }
- if (count_Y_negative == nrow(Y)) {
- stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none encode YES-label");
- }
-}
-
-# Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]
-
-if (link_type == 0)
-{
- if (distribution_type == 1) {
- link_type = 1;
- link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
- } else { if (distribution_type == 2) {
- link_type = 2;
-} } }
-
-# For power distributions and/or links, we use two constants,
-# "variance as power of the mean" and "link_as_power_of_the_mean",
-# to specify the variance and the link as arbitrary powers of the
-# mean. However, the variance-powers of 1.0 (Poisson family) and
-# 2.0 (Gamma family) have to be treated as special cases, because
-# these values integrate into logarithms. The link-power of 0.0
-# is also special as it represents the logarithm link.
-
-num_response_columns = ncol (Y);
-
-is_supported = check_if_supported (num_response_columns, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-if (is_supported == 1)
-{
-
-##### INITIALIZE THE BETAS #####
-
-[beta, saturated_log_l, isNaN] =
- glm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);
-if (isNaN == 0)
-{
-
-##### START OF THE MAIN PART #####
-
-sum_X_sq = sum (rowSums_X_sq);
-trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
-### max_trust_delta = trust_delta * 10000.0;
-log_l = 0.0;
-deviance_nodisp = 0.0;
-new_deviance_nodisp = 0.0;
-isNaN_log_l = 2;
-newbeta = beta;
-g = matrix (0.0, rows = num_features, cols = 1);
-g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-accept_new_beta = 1;
-reached_trust_boundary = 0;
-neg_log_l_change_predicted = 0.0;
-i_IRLS = 0;
-
-print ("BEGIN IRLS ITERATIONS...");
-
-ssX_newbeta = diag (scale_X) %*% newbeta;
-ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
-all_linear_terms = X %*% ssX_newbeta;
-
-[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
- (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-if (isNaN_new_log_l == 0) {
- new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
- new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
-}
-
-if (fileLog != " ") {
- log_str = "POINT_STEP_NORM," + i_IRLS + "," + sqrt (sum (beta ^ 2));
- log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
- log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
- log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
-} else {
- log_str = " ";
-}
-
-# set w to avoid 'Initialization of w depends on if-else/while execution' warnings
-w = matrix (0.0, rows=1, cols=1);
-while (termination_code == 0)
-{
- accept_new_beta = 1;
-
- if (i_IRLS > 0)
- {
- if (isNaN_log_l == 0) {
- accept_new_beta = 0;
- }
-
-# Decide whether to accept a new iteration point and update the trust region
-# See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-
- rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
- if (rho < 0.25 | isNaN_new_log_l == 1) {
- trust_delta = 0.25 * trust_delta;
- }
- if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {
- trust_delta = 2 * trust_delta;
-
-### if (trust_delta > max_trust_delta) {
-### trust_delta = max_trust_delta;
-### }
-
- }
- if (rho > 0.1 & isNaN_new_log_l == 0) {
- accept_new_beta = 1;
- }
- }
-
- if (fileLog != " ") {
- log_str = append (log_str, "IS_POINT_UPDATED," + i_IRLS + "," + accept_new_beta);
- log_str = append (log_str, "TRUST_DELTA," + i_IRLS + "," + trust_delta);
- }
- if (accept_new_beta == 1)
- {
- beta = newbeta; log_l = new_log_l; deviance_nodisp = new_deviance_nodisp; isNaN_log_l = isNaN_new_log_l;
-
- [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
- # We introduced these variables to avoid roundoff errors:
- # g_Y = y_residual / (y_var * link_grad);
- # w = 1.0 / (y_var * link_grad * link_grad);
-
- gXY = - t(X) %*% g_Y;
- g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
- g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
-
- if (fileLog != " ") {
- log_str = append (log_str, "GRADIENT_NORM," + i_IRLS + "," + g_norm);
- }
- }
-
- [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] =
- get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);
-
- newbeta = beta + z;
-
- ssX_newbeta = diag (scale_X) %*% newbeta;
- ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;
- all_linear_terms = X %*% ssX_newbeta;
-
- [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
- (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
- if (isNaN_new_log_l == 0) {
- new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
- new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
- }
-
- log_l_change = new_log_l - log_l; # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps
-
- if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 &
- (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )
- {
- termination_code = 1;
- }
- rho = - log_l_change / neg_log_l_change_predicted;
- z_norm = sqrt (sum (z * z));
-
- [z_norm_m, z_norm_e] = round_to_print (z_norm);
- [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
- [rho_m, rho_e] = round_to_print (rho);
- [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
- [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
- [g_norm_m, g_norm_e] = round_to_print (g_norm);
-
- i_IRLS = i_IRLS + 1;
- print ("Iter #" + i_IRLS + " completed"
- + ", ||z|| = " + z_norm_m + "E" + z_norm_e
- + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
- + ", reached = " + reached_trust_boundary
- + ", ||g|| = " + g_norm_m + "E" + g_norm_e
- + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
- + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
- + ", rho = " + rho_m + "E" + rho_e);
-
- if (fileLog != " ") {
- log_str = append (log_str, "NUM_CG_ITERS," + i_IRLS + "," + num_CG_iters);
- log_str = append (log_str, "IS_TRUST_REACHED," + i_IRLS + "," + reached_trust_boundary);
- log_str = append (log_str, "POINT_STEP_NORM," + i_IRLS + "," + z_norm);
- log_str = append (log_str, "OBJECTIVE," + i_IRLS + "," + (- new_log_l));
- log_str = append (log_str, "OBJ_DROP_REAL," + i_IRLS + "," + log_l_change);
- log_str = append (log_str, "OBJ_DROP_PRED," + i_IRLS + "," + (- neg_log_l_change_predicted));
- log_str = append (log_str, "OBJ_DROP_RATIO," + i_IRLS + "," + rho);
- log_str = append (log_str, "LINEAR_TERM_MIN," + i_IRLS + "," + min (all_linear_terms));
- log_str = append (log_str, "LINEAR_TERM_MAX," + i_IRLS + "," + max (all_linear_terms));
- }
-
- if (i_IRLS == max_iteration_IRLS) {
- termination_code = 2;
- }
-}
-
-beta = newbeta;
-log_l = new_log_l;
-deviance_nodisp = new_deviance_nodisp;
-
-if (termination_code == 1) {
- print ("Converged in " + i_IRLS + " steps.");
-} else {
- print ("Did not converge.");
-}
-
-ssX_beta = diag (scale_X) %*% beta;
-ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;
-if (intercept_status == 2) {
- beta_out = append (ssX_beta, beta);
-} else {
- beta_out = ssX_beta;
-}
-
-write (beta_out, $10);
-
-if (intercept_status == 1 | intercept_status == 2) {
- intercept_value = as.scalar (beta_out [num_features, 1]);
- beta_noicept = beta_out [1 : (num_features - 1), 1];
-} else {
- beta_noicept = beta_out [1 : num_features, 1];
-}
-min_beta = min (beta_noicept);
-max_beta = max (beta_noicept);
-tmp_i_min_beta = rowIndexMin (t(beta_noicept))
-i_min_beta = as.scalar (tmp_i_min_beta [1, 1]);
-tmp_i_max_beta = rowIndexMax (t(beta_noicept))
-i_max_beta = as.scalar (tmp_i_max_beta [1, 1]);
-
-##### OVER-DISPERSION PART #####
-
-all_linear_terms = X %*% ssX_beta;
-[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
-
-pearson_residual_sq = g_Y ^ 2 / w;
-pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0, replacement = 0);
-# pearson_residual_sq = (y_residual ^ 2) / y_var;
-
-if (num_records > num_features) {
- estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
-}
-if (dispersion <= 0.0) {
- dispersion = estimated_dispersion;
-}
-deviance = deviance_nodisp / dispersion;
-
-##### END OF THE MAIN PART #####
-
-} else { print ("Input matrices are out of range. Terminating the DML."); termination_code = 3; }
-} else { print ("Distribution/Link not supported. Terminating the DML."); termination_code = 4; }
-
-str = "TERMINATION_CODE," + termination_code;
-str = append (str, "BETA_MIN," + min_beta);
-str = append (str, "BETA_MIN_INDEX," + i_min_beta);
-str = append (str, "BETA_MAX," + max_beta);
-str = append (str, "BETA_MAX_INDEX," + i_max_beta);
-str = append (str, "INTERCEPT," + intercept_value);
-str = append (str, "DISPERSION," + dispersion);
-str = append (str, "DISPERSION_EST," + estimated_dispersion);
-str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
-str = append (str, "DEVIANCE_SCALED," + deviance);
-print (str);
-
-
-
-
-check_if_supported =
- function (int ncol_y, int dist_type, double var_power, int link_type, double link_power)
- return (int is_supported)
-{
- is_supported = 0;
- if (ncol_y == 1 & dist_type == 1 & link_type == 1)
- { # POWER DISTRIBUTION
- is_supported = 1;
- if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse"); } else {
- if (var_power == 0.0 & link_power == 0.0) {print ("Gaussian.log"); } else {
- if (var_power == 0.0 & link_power == 0.5) {print ("Gaussian.sqrt"); } else {
- if (var_power == 0.0 & link_power == 1.0) {print ("Gaussian.id"); } else {
- if (var_power == 0.0 ) {print ("Gaussian.power_nonlog"); } else {
- if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse"); } else {
- if (var_power == 1.0 & link_power == 0.0) {print ("Poisson.log"); } else {
- if (var_power == 1.0 & link_power == 0.5) {print ("Poisson.sqrt"); } else {
- if (var_power == 1.0 & link_power == 1.0) {print ("Poisson.id"); } else {
- if (var_power == 1.0 ) {print ("Poisson.power_nonlog"); } else {
- if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse"); } else {
- if (var_power == 2.0 & link_power == 0.0) {print ("Gamma.log"); } else {
- if (var_power == 2.0 & link_power == 0.5) {print ("Gamma.sqrt"); } else {
- if (var_power == 2.0 & link_power == 1.0) {print ("Gamma.id"); } else {
- if (var_power == 2.0 ) {print ("Gamma.power_nonlog"); } else {
- if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2"); } else {
- if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse"); } else {
- if (var_power == 3.0 & link_power == 0.0) {print ("InvGaussian.log"); } else {
- if (var_power == 3.0 & link_power == 0.5) {print ("InvGaussian.sqrt"); } else {
- if (var_power == 3.0 & link_power == 1.0) {print ("InvGaussian.id"); } else {
- if (var_power == 3.0 ) {print ("InvGaussian.power_nonlog");}else{
- if ( link_power == 0.0) {print ("PowerDist.log"); } else {
- print ("PowerDist.power_nonlog");
- } }}}}} }}}}} }}}}} }}}}} }}
- if (ncol_y == 1 & dist_type == 2)
- {
- print ("Error: Bernoulli response matrix has not been converted into two-column format.");
- }
- if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
- { # BINOMIAL/BERNOULLI DISTRIBUTION
- is_supported = 1;
- if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse"); } else {
- if (link_type == 1 & link_power == 0.0) {print ("Binomial.log"); } else {
- if (link_type == 1 & link_power == 0.5) {print ("Binomial.sqrt"); } else {
- if (link_type == 1 & link_power == 1.0) {print ("Binomial.id"); } else {
- if (link_type == 1) {print ("Binomial.power_nonlog"); } else {
- if (link_type == 2) {print ("Binomial.logit"); } else {
- if (link_type == 3) {print ("Binomial.probit"); } else {
- if (link_type == 4) {print ("Binomial.cloglog"); } else {
- if (link_type == 5) {print ("Binomial.cauchit"); }
- } }}}}} }}}
- if (is_supported == 0) {
- print ("Response matrix with " + ncol_y + " columns, distribution family (" + dist_type + ", " + var_power
- + ") and link family (" + link_type + ", " + link_power + ") are NOT supported together.");
- }
-}
-
-glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)
-return (Matrix[double] beta, double saturated_log_l, int isNaN)
-{
- saturated_log_l = 0.0;
- isNaN = 0;
- y_corr = Y [, 1];
- if (dist_type == 2) {
- n_corr = rowSums (Y);
- is_n_zero = ppred (n_corr, 0.0, "==");
- y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;
- }
- linear_terms = y_corr;
- if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
- if (link_power == 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
- linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- } else { isNaN = 1; }
- } else { if (link_power == 1.0) {
- linear_terms = y_corr;
- } else { if (link_power == -1.0) {
- linear_terms = 1.0 / y_corr;
- } else { if (link_power == 0.5) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- linear_terms = sqrt (y_corr);
- } else { isNaN = 1; }
- } else { if (link_power > 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
- linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
- } else { isNaN = 1; }
- } else {
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
- linear_terms = y_corr ^ link_power;
- } else { isNaN = 1; }
- }}}}}
- }
- if (dist_type == 2 & link_type >= 1 & link_type <= 5)
- { # BINOMIAL/BERNOULLI DISTRIBUTION
- if (link_type == 1 & link_power == 0.0) { # Binomial.log
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
- linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- } else { isNaN = 1; }
- } else { if (link_type == 1 & link_power > 0.0) { # Binomial.power_nonlog pos
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
- linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
- } else { isNaN = 1; }
- } else { if (link_type == 1) { # Binomial.power_nonlog neg
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
- linear_terms = y_corr ^ link_power;
- } else { isNaN = 1; }
- } else {
- is_zero_y_corr = ppred (y_corr, 0.0, "<=");
- is_one_y_corr = ppred (y_corr, 1.0, ">=");
- y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
- if (link_type == 2) { # Binomial.logit
- linear_terms = log (y_corr / (1.0 - y_corr))
- + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- } else { if (link_type == 3) { # Binomial.probit
- y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
- t = sqrt (- 2.0 * log (y_below_half));
- approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
- linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
- + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- } else { if (link_type == 4) { # Binomial.cloglog
- linear_terms = log (- log (1.0 - y_corr))
- - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
- + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- } else { if (link_type == 5) { # Binomial.cauchit
- linear_terms = tan ((y_corr - 0.5) * 3.1415926535897932384626433832795)
- + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
- }} }}}}}
- }
-
- if (isNaN == 0) {
- [saturated_log_l, isNaN] =
- glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
- }
-
- if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
- (dist_type == 2 & link_type >= 2))
- {
- desired_eta = 0.0;
- } else { if (link_type == 1 & link_power == 0.0) {
- desired_eta = log (0.5);
- } else { if (link_type == 1) {
- desired_eta = 0.5 ^ link_power;
- } else {
- desired_eta = 0.5;
- }}}
-
- beta = matrix (0.0, rows = ncol(X), cols = 1);
-
- if (desired_eta != 0.0) {
- if (icept_status == 1 | icept_status == 2) {
- beta [nrow(beta), 1] = desired_eta;
- } else {
- # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
- # Note that "ssX_transform" is trivial here, hence ignored
-
- beta = straightenX (X, 0.000001, max_iter_CG);
- beta = beta * desired_eta;
-} } }
-
-
-glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
- int dist_type, double var_power, int link_type, double link_power)
- return (Matrix[double] g_Y, Matrix[double] w)
- # ORIGINALLY we returned more meaningful vectors, namely:
- # Matrix[double] y_residual : y - y_mean, i.e. y observed - y predicted
- # Matrix[double] link_gradient : derivative of the link function
- # Matrix[double] var_function : variance without dispersion, i.e. the V(mu) function
- # BUT, this caused roundoff errors, so we had to compute "directly useful" vectors
- # and skip over the "meaningful intermediaries". Now we output these two variables:
- # g_Y = y_residual / (var_function * link_gradient);
- # w = 1.0 / (var_function * link_gradient ^ 2);
-{
- num_records = nrow (linear_terms);
- zeros_r = matrix (0.0, rows = num_records, cols = 1);
- ones_r = 1 + zeros_r;
- g_Y = zeros_r;
- w = zeros_r;
-
- # Some constants
-
- one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
- ones_2 = matrix (1.0, rows = 1, cols = 2);
- p_one_m_one = ones_2;
- p_one_m_one [1, 2] = -1.0;
- m_one_p_one = ones_2;
- m_one_p_one [1, 1] = -1.0;
- zero_one = ones_2;
- zero_one [1, 1] = 0.0;
- one_zero = ones_2;
- one_zero [1, 2] = 0.0;
- flip_pos = matrix (0, rows = 2, cols = 2);
- flip_neg = flip_pos;
- flip_pos [1, 2] = 1;
- flip_pos [2, 1] = 1;
- flip_neg [1, 2] = -1;
- flip_neg [2, 1] = 1;
-
- if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
- y_mean = zeros_r;
- if (link_power == 0.0) {
- y_mean = exp (linear_terms);
- y_mean_pow = y_mean ^ (1 - var_power);
- w = y_mean_pow * y_mean;
- g_Y = y_mean_pow * (Y - y_mean);
- } else { if (link_power == 1.0) {
- y_mean = linear_terms;
- w = y_mean ^ (- var_power);
- g_Y = w * (Y - y_mean);
- } else {
- y_mean = linear_terms ^ (1.0 / link_power);
- c1 = (1 - var_power) / link_power - 1;
- c2 = (2 - var_power) / link_power - 2;
- g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
- w = (linear_terms ^ c2) / (link_power ^ 2);
- } }}
- if (dist_type == 2 & link_type >= 1 & link_type <= 5)
- { # BINOMIAL/BERNOULLI DISTRIBUTION
- if (link_type == 1) { # BINOMIAL.POWER LINKS
- if (link_power == 0.0) { # Binomial.log
- vec1 = 1 / (exp (- linear_terms) - 1);
- g_Y = Y [, 1] - Y [, 2] * vec1;
- w = rowSums (Y) * vec1;
- } else { # Binomial.nonlog
- vec1 = zeros_r;
- if (link_power == 0.5) {
- vec1 = 1 / (1 - linear_terms ^ 2);
- } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
- } else {isNaN = 1;}}
- # We want a "zero-protected" version of
- # vec2 = Y [, 1] / linear_terms;
- is_y_0 = ppred (Y [, 1], 0.0, "==");
- vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
- g_Y = (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
- w = rowSums (Y) * vec1 / link_power ^ 2;
- }
- } else {
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
- is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
- finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
- finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
- if (link_type == 2) { # Binomial.logit
- Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
- Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
- Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
- g_Y = rowSums (Y * (Y_prob %*% flip_neg)); ### = y_residual;
- w = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob); ### = y_variance;
- } else { if (link_type == 3) { # Binomial.probit
- is_lt_pos = ppred (linear_terms, 0.0, ">=");
- t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
- pt_gp = t_gp * ( 0.254829592
- + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
- + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
- + t_gp * (-1.453152027
- + t_gp * 1.061405429))));
- the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
- vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
- vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);
- w = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
- g_Y = one_over_sqrt_two_pi * vec2 / vec1;
- } else { if (link_type == 4) { # Binomial.cloglog
- the_exp = exp (linear_terms)
- the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
- the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
- g_Y = (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
- w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
- } else { if (link_type == 5) { # Binomial.cauchit
- Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
- Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
- y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
- var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
- link_gradient_normalized = (1 + linear_terms ^ 2) * 3.1415926535897932384626433832795;
- g_Y = rowSums (Y) * y_residual / (var_function * link_gradient_normalized);
- w = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);
- }}}}
- }
- }
-}
-
-
-glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,
- int dist_type, double var_power, int link_type, double link_power)
- return (double log_l, int isNaN)
-{
- isNaN = 0;
- log_l = 0.0;
- num_records = nrow (Y);
- zeros_r = matrix (0.0, rows = num_records, cols = 1);
-
- if (dist_type == 1 & link_type == 1)
- { # POWER DISTRIBUTION
- b_cumulant = zeros_r;
- natural_parameters = zeros_r;
- is_natural_parameter_log_zero = zeros_r;
- if (var_power == 1.0 & link_power == 0.0) { # Poisson.log
- b_cumulant = exp (linear_terms);
- is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
- natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
- } else { if (var_power == 1.0 & link_power == 1.0) { # Poisson.id
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- b_cumulant = linear_terms;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
- natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
- } else {isNaN = 1;}
- } else { if (var_power == 1.0 & link_power == 0.5) { # Poisson.sqrt
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- b_cumulant = linear_terms ^ 2;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
- natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
- } else {isNaN = 1;}
- } else { if (var_power == 1.0 & link_power > 0.0) { # Poisson.power_nonlog, pos
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
- b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
- natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
- } else {isNaN = 1;}
- } else { if (var_power == 1.0) { # Poisson.power_nonlog, neg
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- b_cumulant = linear_terms ^ (1.0 / link_power);
- natural_parameters = log (linear_terms) / link_power;
- } else {isNaN = 1;}
- } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- b_cumulant = - log (linear_terms);
- natural_parameters = - linear_terms;
- } else {isNaN = 1;}
- } else { if (var_power == 2.0 & link_power == 1.0) { # Gamma.id
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- b_cumulant = log (linear_terms);
- natural_parameters = - 1.0 / linear_terms;
- } else {isNaN = 1;}
- } else { if (var_power == 2.0 & link_power == 0.0) { # Gamma.log
- b_cumulant = linear_terms;
- natural_parameters = - exp (- linear_terms);
- } else { if (var_power == 2.0) { # Gamma.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- b_cumulant = log (linear_terms) / link_power;
- natural_parameters = - linear_terms ^ (- 1.0 / link_power);
- } else {isNaN = 1;}
- } else { if (link_power == 0.0) { # PowerDist.log
- natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
- b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
- } else { # PowerDist.power_nonlog
- if (-2 * link_power == 1.0 - var_power) {
- natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
- } else { if (-1 * link_power == 1.0 - var_power) {
- natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
- } else { if ( link_power == 1.0 - var_power) {
- natural_parameters = linear_terms / (1.0 - var_power);
- } else { if ( 2 * link_power == 1.0 - var_power) {
- natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
- } else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- power = (1.0 - var_power) / link_power;
- natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
- } else {isNaN = 1;}
- }}}}
- if (-2 * link_power == 2.0 - var_power) {
- b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
- } else { if (-1 * link_power == 2.0 - var_power) {
- b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
- } else { if ( link_power == 2.0 - var_power) {
- b_cumulant = linear_terms / (2.0 - var_power);
- } else { if ( 2 * link_power == 2.0 - var_power) {
- b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
- } else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
- power = (2.0 - var_power) / link_power;
- b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
- } else {isNaN = 1;}
- }}}}
- }}}}} }}}}}
- if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
- log_l = -1.0 / 0.0;
- isNaN = 1;
- }
- if (isNaN == 0)
- {
- log_l = sum (Y * natural_parameters - b_cumulant);
- if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
- log_l = -1.0 / 0.0;
- isNaN = 1;
- } } }
-
- if (dist_type == 2 & link_type >= 1 & link_type <= 5)
- { # BINOMIAL/BERNOULLI DISTRIBUTION
-
- [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
-
- if (isNaN == 0) {
- does_prob_contradict = ppred (Y_prob, 0.0, "<=");
- if (sum (does_prob_contradict * abs (Y)) == 0.0) {
- log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
- if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
- isNaN = 1;
- }
- } else {
- log_l = -1.0 / 0.0;
- isNaN = 1;
- } } }
-
- if (isNaN == 1) {
- log_l = - 1.0 / 0.0;
- }
-}
-
-
-
-binomial_probability_two_column =
- function (Matrix[double] linear_terms, int link_type, double link_power)
- return (Matrix[double] Y_prob, int isNaN)
-{
- isNaN = 0;
- num_records = nrow (linear_terms);
-
- # Define some auxiliary matrices
-
- ones_2 = matrix (1.0, rows = 1, cols = 2);
- p_one_m_one = ones_2;
- p_one_m_one [1, 2] = -1.0;
- m_one_p_one = ones_2;
- m_one_p_one [1, 1] = -1.0;
- zero_one = ones_2;
- zero_one [1, 1] = 0.0;
- one_zero = ones_2;
- one_zero [1, 2] = 0.0;
-
- zeros_r = matrix (0.0, rows = num_records, cols = 1);
- ones_r = 1.0 + zeros_r;
-
- # Begin the function body
-
- Y_prob = zeros_r %*% ones_2;
- if (link_type == 1) { # Binomial.power
- if (link_power == 0.0) { # Binomial.log
- Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;
- } else { if (link_power == 0.5) { # Binomial.sqrt
- Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;
- } else { # Binomial.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;
- } else {isNaN = 1;}
- }}
- } else { # Binomial.non_power
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
- is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
- finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
- finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
- if (link_type == 2) { # Binomial.logit
- Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
- Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
- } else { if (link_type == 3) { # Binomial.probit
- lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
- t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
- pt_gp = t_gp * ( 0.254829592
- + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
- + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
- + t_gp * (-1.453152027
- + t_gp * 1.061405429))));
- the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
- Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);
- } else { if (link_type == 4) { # Binomial.cloglog
- the_exp = exp (finite_linear_terms);
- the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
- Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
- Y_prob [, 2] = the_exp_exp;
- } else { if (link_type == 5) { # Binomial.cauchit
- Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
- } else {
- isNaN = 1;
- }}}}
- Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;
-} }
-
-
-# THE CG-STEIHAUG PROCEDURE SCRIPT
-
-# Apply Conjugate Gradient - Steihaug algorithm in order to approximately minimize
-# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
-# under constraint: ||z|| <= trust_delta.
-# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and Wright
-# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this transform
-# is given separately because sparse "X" may become dense after applying the transform.
-#
-get_CG_Steihaug_point =
- function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,
- Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)
- return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)
-{
- trust_delta_sq = trust_delta ^ 2;
- size_CG = nrow (g);
- z = matrix (0.0, rows = size_CG, cols = 1);
- neg_log_l_change = 0.0;
- reached_trust_boundary = 0;
- g_reg = g + lambda * beta;
- r_CG = g_reg;
- p_CG = -r_CG;
- rr_CG = sum(r_CG * r_CG);
- eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
- converged_CG = 0;
- if (rr_CG < eps_CG) {
- converged_CG = 1;
- }
-
- max_iteration_CG = max_iter_CG;
- if (max_iteration_CG <= 0) {
- max_iteration_CG = size_CG;
- }
- i_CG = 0;
- while (converged_CG == 0)
- {
- i_CG = i_CG + 1;
- ssX_p_CG = diag (scale_X) %*% p_CG;
- ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
- temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
- q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];
- pq_CG = sum (p_CG * q_CG);
- if (pq_CG <= 0) {
- pp_CG = sum (p_CG * p_CG);
- if (pp_CG > 0) {
- [z, neg_log_l_change] =
- get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
- reached_trust_boundary = 1;
- } else {
- neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
- }
- converged_CG = 1;
- }
- if (converged_CG == 0) {
- alpha_CG = rr_CG / pq_CG;
- new_z = z + alpha_CG * p_CG;
- if (sum(new_z * new_z) >= trust_delta_sq) {
- pp_CG = sum (p_CG * p_CG);
- [z, neg_log_l_change] =
- get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);
- reached_trust_boundary = 1;
- converged_CG = 1;
- }
- if (converged_CG == 0) {
- z = new_z;
- old_rr_CG = rr_CG;
- r_CG = r_CG + alpha_CG * q_CG;
- rr_CG = sum(r_CG * r_CG);
- if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
- neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
- reached_trust_boundary = 0;
- converged_CG = 1;
- }
- if (converged_CG == 0) {
- p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
-} } } } }
-
-
-# An auxiliary function used twice inside the CG-STEIHAUG loop:
-get_trust_boundary_point =
- function (Matrix[double] g, Matrix[double] z, Matrix[double] p,
- Matrix[double] q, Matrix[double] r, double pp, double pq,
- double trust_delta_sq)
- return (Matrix[double] new_z, double f_change)
-{
- zz = sum (z * z); pz = sum (p * z);
- sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
- tau_1 = (- pz + sq_root_d) / pp;
- tau_2 = (- pz - sq_root_d) / pp;
- zq = sum (z * q); gp = sum (g * p);
- f_extra = 0.5 * sum (z * (r + g));
- f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
- f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
- if (f_change_1 < f_change_2) {
- new_z = z + (tau_1 * p);
- f_change = f_change_1;
- }
- else {
- new_z = z + (tau_2 * p);
- f_change = f_change_2;
- }
-}
-
-
-# Computes vector w such that ||X %*% w - 1|| -> MIN given avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute w = c * z_LS such that sum(X %*% w) = nrow(X).
-straightenX =
- function (Matrix[double] X, double eps, int max_iter_CG)
- return (Matrix[double] w)
-{
- w_X = t(colSums(X));
- lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
- eps_LS = eps * nrow(X);
-
- # BEGIN LEAST SQUARES
-
- r_LS = - w_X;
- z_LS = matrix (0.0, rows = ncol(X), cols = 1);
- p_LS = - r_LS;
- norm_r2_LS = sum (r_LS ^ 2);
- i_LS = 0;
- while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
- {
- q_LS = t(X) %*% X %*% p_LS;
- q_LS = q_LS + lambda_LS * p_LS;
- alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
- z_LS = z_LS + alpha_LS * p_LS;
- old_norm_r2_LS = norm_r2_LS;
- r_LS = r_LS + alpha_LS * q_LS;
- norm_r2_LS = sum (r_LS ^ 2);
- p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
- i_LS = i_LS + 1;
- }
-
- # END LEAST SQUARES
-
- w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-}
-
-
-round_to_print = function (double x_to_truncate)
-return (double mantissa, int eee)
-{
- mantissa = 1.0;
- eee = 0;
- positive_infinity = 1.0 / 0.0;
- x = abs (x_to_truncate);
- if (x != x / 2.0) {
- log_ten = log (10.0);
- d_eee = round (log (x) / log_ten - 0.5);
- mantissa = round (x * exp (log_ten * (4.0 - d_eee))) / 10000;
- if (mantissa == 10.0) {
- mantissa = 1.0;
- d_eee = d_eee + 1;
- }
- if (x_to_truncate < 0.0) {
- mantissa = - mantissa;
- }
- eee = 0;
- pow_two = 1;
- res_eee = abs (d_eee);
- while (res_eee != 0.0) {
- new_res_eee = round (res_eee / 2.0 - 0.3);
- if (new_res_eee * 2.0 < res_eee) {
- eee = eee + pow_two;
- }
- res_eee = new_res_eee;
- pow_two = 2 * pow_two;
- }
- if (d_eee < 0.0) {
- eee = - eee;
- }
- } else { mantissa = x_to_truncate; }
-}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_Kmeans.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_Kmeans.dml b/src/test/scripts/functions/codegen/Algorithm_Kmeans.dml
deleted file mode 100644
index 9bdfab0..0000000
--- a/src/test/scripts/functions/codegen/Algorithm_Kmeans.dml
+++ /dev/null
@@ -1,243 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-X = read($1)
-num_centroids = $2
-num_runs = $3
-eps = $4;
-max_iter = $5;
-
-is_write_Y = 0;
-is_verbose = 0;
-avg_sample_size_per_centroid = 50;
-
-print ("BEGIN K-MEANS SCRIPT");
-print ("Reading X...");
-num_records = nrow (X);
-num_features = ncol (X);
-
-sumXsq = sum (X ^ 2);
-# Remark - A useful rewrite: sum (A %*% B) = sum (t(colSums(A)) * rowSums(B))
-
-# STEP 1: INITIALIZE CENTROIDS FOR ALL RUNS FROM DATA SAMPLES:
-
-print ("Taking data samples for initialization...");
-
-[sample_maps, samples_vs_runs_map, sample_block_size] =
- get_sample_maps (num_records, num_runs, num_centroids * avg_sample_size_per_centroid);
-
-is_row_in_samples = rowSums (sample_maps);
-X_samples = sample_maps %*% X;
-X_samples_sq_norms = rowSums (X_samples ^ 2);
-
-print ("Initializing the centroids for all runs...");
-All_Centroids = matrix (0, rows = (num_runs * num_centroids), cols = num_features);
-
-# We select centroids according to the k-Means++ heuristic applied to a sample of X
-# Loop invariant: min_distances ~ sq.distances from X_sample rows to nearest centroids,
-# with the out-of-range X_sample positions in min_distances set to 0.0
-
-min_distances = is_row_in_samples; # Pick the 1-st centroids uniformly at random
-
-for (i in 1 : num_centroids)
-{
- # "Matricize" and prefix-sum to compute the cumulative distribution function:
- min_distances_matrix_form =
- matrix (min_distances, rows = sample_block_size, cols = num_runs, byrow = FALSE);
- cdf_min_distances = cumsum (min_distances_matrix_form);
-
- # Select the i-th centroid in each sample as a random sample row id with
- # probability ~ min_distances:
- random_row = Rand (rows = 1, cols = num_runs, min = 0.0, max = 1.0);
- threshold_matrix = random_row * cdf_min_distances [sample_block_size, ];
- centroid_ids = t(colSums (cdf_min_distances < threshold_matrix)) + 1;
-
- # Place the selected centroids together, one per run, into a matrix:
- centroid_placer = matrix (0, rows = num_runs, cols = (sample_block_size * num_runs));
- centroid_placer_raw =
- table (seq (1, num_runs, 1), sample_block_size * seq (0, num_runs - 1, 1) + centroid_ids);
- centroid_placer [, 1 : ncol (centroid_placer_raw)] = centroid_placer_raw;
- centroids = centroid_placer %*% X_samples;
-
- # Place the selected centroids into their appropriate slots in All_Centroids:
- centroid_placer = matrix (0, rows = nrow (All_Centroids), cols = num_runs);
- centroid_placer_raw =
- table (seq (i, num_centroids * (num_runs - 1) + i, num_centroids), seq (1, num_runs, 1));
- centroid_placer [1 : nrow (centroid_placer_raw), ] = centroid_placer_raw;
- All_Centroids = All_Centroids + centroid_placer %*% centroids;
-
- # Update min_distances to preserve the loop invariant:
- distances = X_samples_sq_norms + samples_vs_runs_map %*% rowSums (centroids ^ 2)
- - 2 * rowSums (X_samples * (samples_vs_runs_map %*% centroids));
- if (i == 1) {
- min_distances = is_row_in_samples * distances;
- } else {
- min_distances = min (min_distances, distances);
-} }
-
-# STEP 2: PERFORM K-MEANS ITERATIONS FOR ALL RUNS:
-
-termination_code = matrix (0, rows = num_runs, cols = 1);
-final_wcss = matrix (0, rows = num_runs, cols = 1);
-num_iterations = matrix (0, rows = num_runs, cols = 1);
-
-print ("Performing k-means iterations for all runs...");
-
-parfor (run_index in 1 : num_runs, check = 0)
-{
- C = All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ];
- C_old = C;
- iter_count = 0;
- term_code = 0;
- wcss = 0;
-
- while (term_code == 0)
- {
- # Compute Euclidean squared distances from records (X rows) to centroids (C rows)
- # without the C-independent term, then take the minimum for each record
- D = -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
- minD = rowMins (D);
- # Compute the current centroid-based within-cluster sum of squares (WCSS)
- wcss_old = wcss;
- wcss = sumXsq + sum (minD);
- if (is_verbose == 1) {
- if (iter_count == 0) {
- print ("Run " + run_index + ", At Start-Up: Centroid WCSS = " + wcss);
- } else {
- print ("Run " + run_index + ", Iteration " + iter_count + ": Centroid WCSS = " + wcss
- + "; Centroid change (avg.sq.dist.) = " + (sum ((C - C_old) ^ 2) / num_centroids));
- } }
- # Check if convergence or maximum iteration has been reached
- if (wcss_old - wcss < eps * wcss & iter_count > 0) {
- term_code = 1; # Convergence is reached
- } else {
- if (iter_count >= max_iter) {
- term_code = 2; # Maximum iteration is reached
- } else {
- iter_count = iter_count + 1;
- # Find the closest centroid for each record
- P = (D <= minD);
- # If some records belong to multiple centroids, share them equally
- P = P / rowSums (P);
- # Compute the column normalization factor for P
- P_denom = colSums (P);
- if (sum (P_denom <= 0.0) > 0) {
- term_code = 3; # There is a "runaway" centroid with 0.0 denominator
- } else {
- C_old = C;
- # Compute new centroids as weighted averages over the records
- C = (t(P) %*% X) / t(P_denom);
- } } } }
- print ("Run " + run_index + ", Iteration " + iter_count + ": Terminated with code = " + term_code + ", Centroid WCSS = " + wcss);
- All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ] = C;
- final_wcss [run_index, 1] = wcss;
- termination_code [run_index, 1] = term_code;
- num_iterations [run_index, 1] = iter_count;
-}
-
-# STEP 3: SELECT THE RUN WITH BEST CENTROID-WCSS AND OUTPUT ITS CENTROIDS:
-
-termination_bitmap = matrix (0, rows = num_runs, cols = 3);
-termination_bitmap_raw = table (seq (1, num_runs, 1), termination_code);
-termination_bitmap [, 1 : ncol(termination_bitmap_raw)] = termination_bitmap_raw;
-termination_stats = colSums (termination_bitmap);
-print ("Number of successful runs = " + as.integer (as.scalar (termination_stats [1, 1])));
-print ("Number of incomplete runs = " + as.integer (as.scalar (termination_stats [1, 2])));
-print ("Number of failed runs (with lost centroids) = " + as.integer (as.scalar (termination_stats [1, 3])));
-
-num_successful_runs = as.scalar (termination_stats [1, 1]);
-if (num_successful_runs > 0) {
- final_wcss_successful = final_wcss * termination_bitmap [, 1];
- worst_wcss = max (final_wcss_successful);
- best_wcss = min (final_wcss_successful + (10 * worst_wcss + 10) * (1 - termination_bitmap [, 1]));
- avg_wcss = sum (final_wcss_successful) / num_successful_runs;
- best_index_vector = (final_wcss_successful == best_wcss);
- aggr_best_index_vector = cumsum (best_index_vector);
- best_index = as.integer (sum (aggr_best_index_vector == 0) + 1);
- print ("Successful runs: Best run is " + best_index + " with Centroid WCSS = " + best_wcss
- + "; Avg WCSS = " + avg_wcss + "; Worst WCSS = " + worst_wcss);
- C = All_Centroids [(num_centroids * (best_index - 1) + 1) : (num_centroids * best_index), ];
- print ("Writing out the best-WCSS centroids...");
- write (C, $6, format="text");
- print ("DONE.");
-} else {
- stop ("No output is produced. Try increasing the number of iterations and/or runs.");
-}
-
-
-
-get_sample_maps = function (int num_records, int num_samples, int approx_sample_size)
- return (Matrix[double] sample_maps, Matrix[double] sample_col_map, int sample_block_size)
-{
- if (approx_sample_size < num_records) {
- # Input value "approx_sample_size" is the average sample size; increase it by ~10 std.dev's
- # to get the sample block size (to allocate space):
- sample_block_size = as.integer (approx_sample_size + round (10 * sqrt (approx_sample_size)));
- num_rows = sample_block_size * num_samples;
-
- # Generate all samples in parallel by converting uniform random values into random
- # integer skip-ahead intervals and prefix-summing them:
- sample_rec_ids = Rand (rows = sample_block_size, cols = num_samples, min = 0.0, max = 1.0);
- sample_rec_ids = round (log (sample_rec_ids) / log (1.0 - approx_sample_size / num_records) + 0.5);
- # Prob [k-1 < log(uniform)/log(1-p) < k] = p*(1-p)^(k-1) = Prob [k-1 zeros before a one]
- sample_rec_ids = cumsum (sample_rec_ids); # (skip to next one) --> (skip to i-th one)
-
- # Replace all sample record ids over "num_records" (i.e. out of range) by "num_records + 1":
- is_sample_rec_id_within_range = (sample_rec_ids <= num_records);
- sample_rec_ids = sample_rec_ids * is_sample_rec_id_within_range
- + (num_records + 1) * (1 - is_sample_rec_id_within_range);
-
- # Rearrange all samples (and their out-of-range indicators) into one column-vector:
- sample_rec_ids =
- matrix (sample_rec_ids, rows = num_rows, cols = 1, byrow = FALSE);
- is_row_in_samples =
- matrix (is_sample_rec_id_within_range, rows = num_rows, cols = 1, byrow = FALSE);
-
- # Use contingency table to create the "sample_maps" matrix that is a vertical concatenation
- # of 0-1-matrices, one per sample, each with 1s at (i, sample_record[i]) and 0s elsewhere:
- sample_maps_raw = table (seq (1, num_rows), sample_rec_ids);
- max_rec_id = ncol (sample_maps_raw);
- if (max_rec_id >= num_records) {
- sample_maps = sample_maps_raw [, 1 : num_records];
- } else {
- sample_maps = matrix (0, rows = num_rows, cols = num_records);
- sample_maps [, 1 : max_rec_id] = sample_maps_raw;
- }
-
- # Create a 0-1-matrix that maps each sample column ID into all row positions of the
- # corresponding sample; map out-of-sample-range positions to row id = num_rows + 1:
- sample_positions = (num_rows + 1) - is_row_in_samples * seq (num_rows, 1, -1);
- # Column ID positions = 1, 1, ..., 1, 2, 2, ..., 2, . . . , n_c, n_c, ..., n_c:
- col_positions = round (0.5 + seq (0, num_rows - 1, 1) / sample_block_size);
- sample_col_map = table (sample_positions, col_positions);
- # Remove the out-of-sample-range positions by cutting off the last row:
- sample_col_map = sample_col_map [1 : (num_rows), ];
-
- } else {
- one_per_record = matrix (1, rows = num_records, cols = 1);
- sample_block_size = num_records;
- sample_maps = matrix (0, rows = (num_records * num_samples), cols = num_records);
- sample_col_map = matrix (0, rows = (num_records * num_samples), cols = num_samples);
- for (i in 1:num_samples) {
- sample_maps [(num_records * (i - 1) + 1) : (num_records * i), ] = diag (one_per_record);
- sample_col_map [(num_records * (i - 1) + 1) : (num_records * i), i] = one_per_record;
-} } }
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_L2SVM.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_L2SVM.R b/src/test/scripts/functions/codegen/Algorithm_L2SVM.R
index 36e844e..68fe3e5 100644
--- a/src/test/scripts/functions/codegen/Algorithm_L2SVM.R
+++ b/src/test/scripts/functions/codegen/Algorithm_L2SVM.R
@@ -40,15 +40,14 @@ if(num_min + num_max != nrow(Y)){
Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
}
-N = nrow(X)
-D = ncol(X)
+dimensions = ncol(X)
if (intercept == 1) {
- ones = matrix(1,N,1)
+ ones = matrix(1, rows=num_samples, cols=1)
X = cbind(X, ones);
}
-num_rows_in_w = D
+num_rows_in_w = dimensions
if(intercept == 1){
num_rows_in_w = num_rows_in_w + 1
}
@@ -59,6 +58,9 @@ s = g_old
Xw = matrix(0,nrow(X),1)
iter = 0
+positive_label = check_max
+negative_label = check_min
+
continue = TRUE
while(continue && iter < maxiterations){
t = 0
@@ -95,4 +97,12 @@ while(continue && iter < maxiterations){
iter = iter + 1
}
+extra_model_params = matrix(0, 4, 1)
+extra_model_params[1,1] = positive_label
+extra_model_params[2,1] = negative_label
+extra_model_params[3,1] = intercept
+extra_model_params[4,1] = dimensions
+
+w = t(cbind(t(w), t(extra_model_params)))
+
writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_L2SVM.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_L2SVM.dml b/src/test/scripts/functions/codegen/Algorithm_L2SVM.dml
deleted file mode 100644
index 9a6a631..0000000
--- a/src/test/scripts/functions/codegen/Algorithm_L2SVM.dml
+++ /dev/null
@@ -1,106 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-X = read($1)
-Y = read($2)
-intercept = $3;
-eps = $4;
-maxiter = $5;
-
-check_min = min(Y)
-check_max = max(Y)
-num_min = sum(ppred(Y, check_min, "=="))
-num_max = sum(ppred(Y, check_max, "=="))
-if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels")
-else{
- if(check_min != -1 | check_max != +1)
- Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
-}
-
-epsilon = eps
-lambda = 0.001
-maxiterations = maxiter
-num_samples = nrow(X)
-dimensions = ncol(X)
-
-if (intercept == 1) {
- ones = matrix(1, rows=num_samples, cols=1)
- X = append(X, ones);
-}
-
-num_rows_in_w = dimensions
-if(intercept == 1){
- num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=1)
-
-g_old = t(X) %*% Y
-s = g_old
-
-Xw = matrix(0, rows=nrow(X), cols=1)
-debug_str = "# Iter, Obj"
-iter = 0
-continue = 1
-while(continue == 1 & iter < maxiterations) {
- # minimizing primal obj along direction s
- step_sz = 0
- Xd = X %*% s
- wd = lambda * sum(w * s)
- dd = lambda * sum(s * s)
- continue1 = 1
- while(continue1 == 1){
- tmp_Xw = Xw + step_sz*Xd
- out = 1 - Y * (tmp_Xw)
- sv = ppred(out, 0, ">")
- out = out * sv
- g = wd + step_sz*dd - sum(out * Y * Xd)
- h = dd + sum(Xd * sv * Xd)
- step_sz = step_sz - g/h
- if (g*g/h < 0.0000000001){
- continue1 = 0
- }
- }
-
- #update weights
- w = w + step_sz*s
- Xw = Xw + step_sz*Xd
-
- out = 1 - Y * Xw
- sv = ppred(out, 0, ">")
- out = sv * out
- obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
- g_new = t(X) %*% (out * Y) - lambda * w
-
- print("OBJ = " + obj)
- tmp = sum(s * g_old)
- if(step_sz*tmp < epsilon*obj){
- continue = 0
- }
-
- #non-linear CG step
- be = sum(g_new * g_new)/sum(g_old * g_old)
- s = be * s + g_new
- g_old = g_new
-
- iter = iter + 1
-}
-
-write(w, $6, format="text")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_LinregCG.R b/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
index 5dcad95..600d42e 100644
--- a/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
+++ b/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
@@ -27,31 +27,131 @@ library("Matrix")
X = readMM(paste(args[1], "X.mtx", sep=""))
y = readMM(paste(args[1], "y.mtx", sep=""))
-intercept = as.integer(args[2]);
-eps = as.double(args[3]);
-maxiter = as.double(args[4]);
+intercept_status = as.integer(args[2]);
+tolerance = as.double(args[3]);
+max_iteration = as.double(args[4]);
+regularization = as.double(args[5]);
-if( intercept == 1 ){
- ones = matrix(1, nrow(X), 1);
- X = cbind(X, ones);
+n = nrow (X);
+m = ncol (X);
+ones_n = matrix (1, n, 1);
+zero_cell = matrix (0, 1, 1);
+
+m_ext = m;
+if (intercept_status == 1 | intercept_status == 2) # add the intercept column
+{
+ X = cbind (X, ones_n);
+ m_ext = ncol (X);
+}
+
+scale_lambda = matrix (1, m_ext, 1);
+if (intercept_status == 1 | intercept_status == 2)
+{
+ scale_lambda [m_ext, 1] = 0;
}
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, ncol(X), 1);
+if (intercept_status == 2) {
+ avg_X_cols = t(colSums(X)) / n;
+ var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
+ is_unsafe = (var_X_cols <= 0);
+ scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+ scale_X [m_ext, 1] = 1;
+ shift_X = - avg_X_cols * scale_X;
+ shift_X [m_ext, 1] = 0;
+} else {
+ scale_X = matrix (1, m_ext, 1);
+ shift_X = matrix (0, m_ext, 1);
+}
+
+lambda = scale_lambda * regularization;
+beta_unscaled = matrix (0, m_ext, 1);
+if (max_iteration == 0) {
+ max_iteration = m_ext;
+}
i = 0;
-while(i < maxiter) {
- q = ((t(X) %*% (X %*% p)) + eps * p);
- alpha = norm_r2 / ((t(p) %*% q)[1:1]);
- w = w + alpha * p;
+r = - t(X) %*% y;
+
+if (intercept_status == 2) {
+ r = scale_X * r + shift_X %*% r [m_ext, ];
+}
+
+p = - r;
+norm_r2 = sum (r ^ 2);
+norm_r2_initial = norm_r2;
+norm_r2_target = norm_r2_initial * tolerance ^ 2;
+
+while (i < max_iteration & norm_r2 > norm_r2_target)
+{
+ if (intercept_status == 2) {
+ ssX_p = scale_X * p;
+ ssX_p [m_ext, ] = ssX_p [m_ext, ] + t(shift_X) %*% p;
+ } else {
+ ssX_p = p;
+ }
+
+ q = t(X) %*% (X %*% ssX_p);
+
+ if (intercept_status == 2) {
+ q = scale_X * q + shift_X %*% q [m_ext, ];
+ }
+
+ q = q + lambda * p;
+ a = norm_r2 / sum (p * q);
+ beta_unscaled = beta_unscaled + a * p;
+ r = r + a * q;
old_norm_r2 = norm_r2;
- r = r + alpha * q;
- norm_r2 = sum(r * r);
- beta = norm_r2 / old_norm_r2;
- p = -r + beta * p;
+ norm_r2 = sum (r ^ 2);
+ p = -r + (norm_r2 / old_norm_r2) * p;
i = i + 1;
}
-writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""))
+if (intercept_status == 2) {
+ beta = scale_X * beta_unscaled;
+ beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
+} else {
+ beta = beta_unscaled;
+}
+
+avg_tot = sum (y) / n;
+ss_tot = sum (y ^ 2);
+ss_avg_tot = ss_tot - n * avg_tot ^ 2;
+var_tot = ss_avg_tot / (n - 1);
+y_residual = y - X %*% beta;
+avg_res = sum (y_residual) / n;
+ss_res = sum (y_residual ^ 2);
+ss_avg_res = ss_res - n * avg_res ^ 2;
+
+plain_R2 = 1 - ss_res / ss_avg_tot;
+if (n > m_ext) {
+ dispersion = ss_res / (n - m_ext);
+ adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
+} else {
+ dispersion = 0.0 / 0.0;
+ adjusted_R2 = 0.0 / 0.0;
+}
+
+plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
+deg_freedom = n - m - 1;
+if (deg_freedom > 0) {
+ var_res = ss_avg_res / deg_freedom;
+ adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
+} else {
+ var_res = 0.0 / 0.0;
+ adjusted_R2_nobias = 0.0 / 0.0;
+}
+
+plain_R2_vs_0 = 1 - ss_res / ss_tot;
+if (n > m) {
+ adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
+} else {
+ adjusted_R2_vs_0 = 0.0 / 0.0;
+}
+
+if (intercept_status == 2) {
+ beta_out = cbind (beta, beta_unscaled);
+} else {
+ beta_out = beta;
+}
+
+writeMM(as(beta_out,"CsparseMatrix"), paste(args[6], "w", sep=""))
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_LinregCG.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_LinregCG.dml b/src/test/scripts/functions/codegen/Algorithm_LinregCG.dml
deleted file mode 100644
index 92f15d7..0000000
--- a/src/test/scripts/functions/codegen/Algorithm_LinregCG.dml
+++ /dev/null
@@ -1,56 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-X = read($1);
-y = read($2);
-intercept = $3;
-eps = $4;
-maxiter = $5;
-
-if( intercept == 1 ){
- ones = matrix(1, nrow(X), 1);
- X = append(X, ones);
-}
-
-r = -(t(X) %*% y);
-p = -r;
-norm_r2 = sum(r * r);
-w = matrix(0, rows = ncol(X), cols = 1);
-
-i = 0;
-while(i < maxiter) {
- q = ((t(X) %*% (X %*% p)) + eps * p);
- alpha = norm_r2 / as.scalar(t(p) %*% q);
- w = w + alpha * p;
- old_norm_r2 = norm_r2;
- r = r + alpha * q;
- norm_r2 = sum(r * r);
- beta = norm_r2 / old_norm_r2;
- p = -r + beta * p;
- i = i + 1;
-}
-
-write(w, $6);
-
-
-
-