You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/05/22 22:49:56 UTC
incubator-systemml git commit: [SYSTEMML-1549] Cox.dml - return S & T
in usable format
Repository: incubator-systemml
Updated Branches:
refs/heads/master 4ac77744f -> 3d1f77ce2
[SYSTEMML-1549] Cox.dml - return S & T in usable format
Return S and T as a matrix instead of as a string.
Closes #465.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3d1f77ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3d1f77ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3d1f77ce
Branch: refs/heads/master
Commit: 3d1f77ce20ce28a958544f19a84f589f4840c3ed
Parents: 4ac7774
Author: Brendan Dwyer <br...@ibm.com>
Authored: Mon May 22 15:47:12 2017 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Mon May 22 15:47:12 2017 -0700
----------------------------------------------------------------------
scripts/algorithms/Cox.dml | 51 ++++++++++++++++++++++++++++++-----------
1 file changed, 38 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3d1f77ce/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index a021109..e30dc87 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -68,18 +68,18 @@
# M[,6]: lower 100*(1-alpha)% confidence interval of betas
# M[,7]: upper 100*(1-alpha)% confidence interval of betas
#
-# Two log files containing a summary of some statistics of the fitted model:
+# Two matrices containing a summary of some statistics of the fitted model:
# 1- File S with the following format
-# - line 1: no. of observations
-# - line 2: no. of events
-# - line 3: log-likelihood
-# - line 4: AIC
-# - line 5: Rsquare (Cox & Snell)
-# - line 6: max possible Rsquare
+# - row 1: no. of observations
+# - row 2: no. of events
+# - row 3: log-likelihood
+# - row 4: AIC
+# - row 5: Rsquare (Cox & Snell)
+# - row 6: max possible Rsquare
# 2- File T with the following format
-# - line 1: Likelihood ratio test statistic, degree of freedom, P-value
-# - line 2: Wald test statistic, degree of freedom, P-value
-# - line 3: Score (log-rank) test statistic, degree of freedom, P-value
+# - row 1: Likelihood ratio test statistic, degree of freedom, P-value
+# - row 2: Wald test statistic, degree of freedom, P-value
+# - row 3: Score (log-rank) test statistic, degree of freedom, P-value
#
# Additionally, the following matrices are stored (needed for prediction)
# 1- A column matrix RT that contains the order-preserving recoded timestamps from X
@@ -210,7 +210,14 @@ if (ncol (X_orig) < 3) {
loglik = -o;
S_str = "no. of records " + N + " loglik " + loglik;
if (fileS != " ") {
- write (S_str, fileS, format = fmtO);
+ S = matrix(0, 6, 1);
+ S[1, 1] = N;
+ S[2, 1] = 0; # number of events
+ S[3, 1] = loglik;
+ S[4, 1] = -1; # AIC
+ S[5, 1] = -1; # Rsquare
+ S[6, 1] = -1; #Rsquare_max
+ write (S, fileS, format = fmtO);
} else {
print (S_str);
}
@@ -388,41 +395,59 @@ CI_l = b - se_b * z_alpha_2;
CI_r = b - se_b + z_alpha_2;
######## SOME STATISTICS AND TESTS
+S = matrix(0, 6, 1);
+T = matrix(0, 3, 3);
+
# no. of records
S_str = "no. of records " + N;
+S[1, 1] = N;
# no.of events
S_str = append (S_str, "no. of events " + sum (E));
+S[2, 1] = sum (E);
# log-likelihood
loglik = -o;
S_str = append (S_str, "loglik " + loglik + " ");
+S[3, 1] = loglik;
# AIC = -2 * loglik + 2 * D
AIC = -2 * loglik + 2 * D;
S_str = append (S_str, "AIC " + AIC + " ");
+S[4, 1] = AIC;
# Wald test
wald_t = as.scalar (t(b) %*% H %*% b);
wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
+T[1, 1] = wald_t;
+T[1, 2] = D;
+T[1, 3] = wald_p;
# Likelihood ratio test
lratio_t = 2 * o_init - 2 * o;
lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
+T[2, 1] = lratio_t;
+T[2, 2] = D;
+T[2, 3] = lratio_p;
H0_inv = inv (H0);
score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
+T[3, 1] = score_t;
+T[3, 2] = D;
+T[3, 3] = score_p;
# Rsquare (Cox & Snell)
Rsquare = 1 - exp (-lratio_t / N);
Rsquare_max = 1 - exp (-2 * o_init / N);
S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
+S[5, 1] = Rsquare;
S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
+S[6, 1] = Rsquare_max;
M = matrix (0, rows = D, cols = 7);
M[,1] = b;
@@ -435,12 +460,12 @@ M[,7] = CI_r;
write (M, fileM, format = fmtO);
if (fileS != " ") {
- write (S_str, fileS, format = fmtO);
+ write (S, fileS, format = fmtO);
} else {
print (S_str);
}
if (fileT != " ") {
- write (T_str, fileT, format = fmtO);
+ write (T, fileT, format = fmtO);
} else {
print (T_str);
}