You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/05/22 22:49:56 UTC

incubator-systemml git commit: [SYSTEMML-1549] Cox.dml - return S & T in usable format

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 4ac77744f -> 3d1f77ce2


[SYSTEMML-1549] Cox.dml - return S & T in usable format

Return S and T as a matrix instead of as a string.

Closes #465.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3d1f77ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3d1f77ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3d1f77ce

Branch: refs/heads/master
Commit: 3d1f77ce20ce28a958544f19a84f589f4840c3ed
Parents: 4ac7774
Author: Brendan Dwyer <br...@ibm.com>
Authored: Mon May 22 15:47:12 2017 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Mon May 22 15:47:12 2017 -0700

----------------------------------------------------------------------
 scripts/algorithms/Cox.dml | 51 ++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3d1f77ce/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index a021109..e30dc87 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -68,18 +68,18 @@
 #	M[,6]: lower 100*(1-alpha)% confidence interval of betas
 #	M[,7]: upper 100*(1-alpha)% confidence interval of betas
 #
-# Two log files containing a summary of some statistics of the fitted model:
+# Two matrices containing a summary of some statistics of the fitted model:
 # 1- File S with the following format 
-#	- line 1: no. of observations
-#	- line 2: no. of events
-#   - line 3: log-likelihood 
-#	- line 4: AIC
-#	- line 5: Rsquare (Cox & Snell)
-#	- line 6: max possible Rsquare
+#	- row 1: no. of observations
+#	- row 2: no. of events
+#   - row 3: log-likelihood 
+#	- row 4: AIC
+#	- row 5: Rsquare (Cox & Snell)
+#	- row 6: max possible Rsquare
 # 2- File T with the following format
-#	- line 1: Likelihood ratio test statistic, degree of freedom, P-value
-#	- line 2: Wald test statistic, degree of freedom, P-value
-#	- line 3: Score (log-rank) test statistic, degree of freedom, P-value
+#	- row 1: Likelihood ratio test statistic, degree of freedom, P-value
+#	- row 2: Wald test statistic, degree of freedom, P-value
+#	- row 3: Score (log-rank) test statistic, degree of freedom, P-value
 # 
 # Additionally, the following matrices are stored (needed for prediction)
 # 1- A column matrix RT that contains the order-preserving recoded timestamps from X 
@@ -210,7 +210,14 @@ if (ncol (X_orig) < 3) {
 	loglik = -o;
 	S_str = "no. of records " + N + " loglik " + loglik;
 	if (fileS != " ") {
-		write (S_str, fileS, format = fmtO);
+	  S = matrix(0, 6, 1);
+	  S[1, 1] = N;
+	  S[2, 1] = 0; # number of events
+	  S[3, 1] = loglik;
+	  S[4, 1] = -1; # AIC
+	  S[5, 1] = -1; # Rsquare
+	  S[6, 1] = -1; #Rsquare_max
+		write (S, fileS, format = fmtO);
 	} else {
 		print (S_str);
 	}
@@ -388,41 +395,59 @@ CI_l = b - se_b * z_alpha_2;
 CI_r = b - se_b + z_alpha_2;
 
 ######## SOME STATISTICS AND TESTS
+S = matrix(0, 6, 1);
+T = matrix(0, 3, 3);
+
 # no. of records
 S_str = "no. of records " + N;
+S[1, 1] = N;
 
 # no.of events
 S_str = append (S_str, "no. of events " + sum (E));
+S[2, 1] = sum (E);
 
 # log-likelihood
 loglik = -o;
 S_str = append (S_str, "loglik " + loglik + " ");
+S[3, 1] = loglik;
 
 # AIC = -2 * loglik + 2 * D
 AIC = -2 * loglik + 2 * D;
 S_str = append (S_str, "AIC " + AIC + " ");
+S[4, 1] = AIC;
 
 # Wald test
 wald_t = as.scalar (t(b) %*% H %*% b);
 wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
 T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
+T[1, 1] = wald_t;
+T[1, 2] = D;
+T[1, 3] = wald_p;
 
 # Likelihood ratio test
 lratio_t = 2 * o_init - 2 * o;
 lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
 T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
 
+T[2, 1] = lratio_t;
+T[2, 2] = D;
+T[2, 3] = lratio_p;
 
 H0_inv = inv (H0);
 score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
 score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
 T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
+T[3, 1] = score_t;
+T[3, 2] = D;
+T[3, 3] = score_p;
 
 # Rsquare (Cox & Snell)
 Rsquare = 1 - exp (-lratio_t / N);  
 Rsquare_max = 1 - exp (-2 * o_init / N);
 S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
+S[5, 1] = Rsquare;
 S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
+S[6, 1] = Rsquare_max;
 
 M = matrix (0, rows = D, cols = 7);
 M[,1] = b;
@@ -435,12 +460,12 @@ M[,7] = CI_r;
 
 write (M, fileM, format = fmtO);
 if (fileS != " ") {
-	write (S_str, fileS, format = fmtO);
+	write (S, fileS, format = fmtO);
 } else {
 	print (S_str);
 }
 if (fileT != " ") {
-	write (T_str, fileT, format = fmtO);
+	write (T, fileT, format = fmtO);
 } else {
 	print (T_str);
 }