You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:34:26 UTC

[50/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/Cox-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox-predict.dml b/scripts/algorithms/Cox-predict.dml
index 6f2bec0..7d444fd 100644
--- a/scripts/algorithms/Cox-predict.dml
+++ b/scripts/algorithms/Cox-predict.dml
@@ -1,181 +1,181 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A COX PROPORTIONAL HAZARD REGRESSION MODEL TO A NEW (TEST) DATASET
-# 
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---          Location to read the input matrix containing the survival data with the following schema:
-# 								 - X[,1]: timestamps 
-#								 - X[,2]: whether an event occurred (1) or data is censored (0)
-#								 - X[,3:]: feature vectors (excluding the baseline columns) used for model fitting	 
-# RT      String   ---			Location to read column matrix RT containing the (order preserving) recoded timestamps from X 	
-# M       String   ---			Location to read matrix M containing the fitted Cox model with the following schema:
-#								 - M[,1]: betas	
-#								 - M[,2]: exp(betas)
-#								 - M[,3]: standard error of betas
-#								 - M[,4]: Z 
-#								 - M[,5]: p-value
-#								 - M[,6]: lower 100*(1-alpha)% confidence interval of betas
-#								 - M[,7]: upper 100*(1-alpha)% confidence interval of betas
-# Y       String   --- 			Location to read matrix Y used for prediction  
-# COV	  String   ---			Location to read the variance-covariance matrix of the betas	
-# MF      String   ---          Location to read column indices of X excluding the baseline factors if available
-# P       String   ---          Location to store matrix P containing the results of prediction
-# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- A matrix P with the following schema:
-#	P[,1]: linear predictors relative to a baseline which contains the mean values for each feature
-#	   	   i.e., (Y[3:] - colMeans (X[3:])) %*% b
-#	P[,2]: standard error of linear predictors
-#	P[,3]: risk relative to a baseline which contains the mean values for each feature
-#		   i.e., exp ((Y[3:] - colMeans (X[3:])) %*% b)
-#	P[,4]: standard error of risk 
-#	P[,5]: estimates of cumulative hazard
-#	P[,6]: standard error of the estimates of cumulative hazard
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f cox-predict.dml -nvargs X=INPUT_DIR/X RT=INPUT_DIR/RT M=INPUT_DIR/M Y=INPUT_DIR/Y 
-#											  COV=INTPUT_DIR/COV MF=INPUT_DIR/MF P=OUTPUT_DIR/P fmt=csv
-
-fileX = $X;
-fileRT = $RT;
-fileMF = $MF;
-fileY = $Y; 
-fileM = $M;
-fileCOV = $COV;
-fileP = $P; 
-
-# Default values of some parameters
-fmtO = ifdef ($fmt, "text");       # $fmt="text" 
-
-X_orig = read (fileX);
-RT_X = read (fileRT); 
-Y_orig = read (fileY);
-M = read (fileM);
-b = M[,1];
-COV = read (fileCOV);
-
-col_ind = read (fileMF);
-tab = table (col_ind, seq (1, nrow (col_ind)), ncol (Y_orig), nrow (col_ind));
-Y_orig = Y_orig %*% tab;
-
-
-# Y and X have the same dimensions and schema
-if (ncol (Y_orig) != ncol (X_orig)) { 
-	stop ("Y has a wrong number of columns!");
-}
-
-X = X_orig[,3:ncol (X_orig)];
-T_X = X_orig[,1]; 
-E_X = X_orig[,2];
-D = ncol (X);
-N = nrow (X);
-Y_orig = order (target = Y_orig, by = 1);
-Y = Y_orig[,3:ncol (X_orig)];
-T_Y = Y_orig[,1];
-
-col_means = colMeans (X);
-ones = matrix (1, rows = nrow (Y), cols = 1);
-Y_rel = Y - (ones %*% col_means);
-
-##### compute linear predictors
-LP = Y_rel %*% b; 
-# compute standard error of linear predictors using the Delta method
-se_LP = diag(sqrt (Y_rel %*% COV %*% t(Y_rel)));
-
-##### compute risk
-R = exp (Y_rel %*% b);
-# compute standard error of risk using the Delta method
-se_R = diag(sqrt ((Y_rel * R) %*% COV %*% t(Y_rel * R))) / sqrt (exp (LP));
-
-##### compute estimates of cumulative hazard together with their standard errors: 
-# 1. col contains cumulative hazard estimates
-# 2. col contains standard errors for cumulative hazard estimates
- 
-d_r = aggregate (target = E_X, groups = RT_X, fn = "sum"); 
-e_r = aggregate (target = RT_X, groups = RT_X, fn = "count");
-Idx = cumsum (e_r); 
-all_times = table (seq (1, nrow (Idx), 1), Idx) %*% T_X; # distinct event times 
-
-event_times = removeEmpty (target = ppred (d_r, 0, ">") * all_times, margin = "rows");
-num_distinct_event = nrow (event_times);
-
-num_distinct = nrow (all_times); # no. of distinct timestamps censored or uncensored
-I_rev = table (seq (1, num_distinct, 1), seq (num_distinct, 1, -1));
-e_r_rev_agg = cumsum (I_rev %*% e_r);
-select = t (colSums (table (seq (1, num_distinct), e_r_rev_agg))); 
-
-min_event_time = min (event_times);
-max_event_time = max (event_times);
-T_Y = T_Y + (min_event_time * ppred (T_Y, min_event_time, "<"));
-T_Y = T_Y + (max_event_time * ppred (T_Y, max_event_time, ">"));
-
-Ind = outer (T_Y, t (event_times), ">=");
-Ind = table (seq (1, nrow (T_Y)), rowIndexMax (Ind), nrow (T_Y), num_distinct_event);
-
-exp_Xb = exp (X %*% b);
-exp_Xb_agg = aggregate (target = exp_Xb, groups = RT_X, fn = "sum");
-exp_Xb_cum = I_rev %*% cumsum (I_rev %*% exp_Xb_agg);
-
-H0 = cumsum (removeEmpty (target = d_r / exp_Xb_cum, margin = "rows"));
-P1 = cumsum (removeEmpty (target = d_r / exp_Xb_cum ^ 2, margin = "rows"));
-X_exp_Xb = X * exp (X %*% b);
-
-I_rev_all = table (seq (1, N, 1), seq (N, 1, -1));
-X_exp_Xb_rev_agg = cumsum (I_rev_all %*% X_exp_Xb);
-X_exp_Xb_rev_agg = removeEmpty (target = X_exp_Xb_rev_agg * select, margin = "rows"); 
-X_exp_Xb_cum = I_rev %*% X_exp_Xb_rev_agg; 
-P2 = cumsum (removeEmpty (target = (X_exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
-
-exp_Yb = exp (Y %*% b);	
-exp_Yb_2 = exp_Yb ^ 2;
-Y_exp_Yb = Y * exp (Y %*% b);
-
-# estimates of cumulative hazard
-H = exp_Yb * (Ind %*% H0);
-
-# term1
-term1 = exp_Yb_2 * (Ind %*% P1);
-
-# term2
-P3 = cumsum (removeEmpty (target = (exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
-P4 = (Ind %*% P2) * exp_Yb;
-P5 = Y_exp_Yb * (Ind %*% P3);
-term2 =  P4 - P5; 
-
-# standard error of the estimates of cumulative hazard
-se_H = sqrt (term1 + rowSums((term2 %*% COV) * term2));
-
-# prepare output matrix
-P = matrix (0, rows = nrow (Y), cols = 6);
-P[,1] = LP;
-P[,2] = se_LP;
-P[,3] = R; 
-P[,4] = se_R;
-P[,5] = H;
-P[,6] = se_H;
-write (P, fileP, format=fmtO);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A COX PROPORTIONAL HAZARD REGRESSION MODEL TO A NEW (TEST) DATASET
+# 
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---          Location to read the input matrix containing the survival data with the following schema:
+# 								 - X[,1]: timestamps 
+#								 - X[,2]: whether an event occurred (1) or data is censored (0)
+#								 - X[,3:]: feature vectors (excluding the baseline columns) used for model fitting	 
+# RT      String   ---			Location to read column matrix RT containing the (order preserving) recoded timestamps from X 	
+# M       String   ---			Location to read matrix M containing the fitted Cox model with the following schema:
+#								 - M[,1]: betas	
+#								 - M[,2]: exp(betas)
+#								 - M[,3]: standard error of betas
+#								 - M[,4]: Z 
+#								 - M[,5]: p-value
+#								 - M[,6]: lower 100*(1-alpha)% confidence interval of betas
+#								 - M[,7]: upper 100*(1-alpha)% confidence interval of betas
+# Y       String   --- 			Location to read matrix Y used for prediction  
+# COV	  String   ---			Location to read the variance-covariance matrix of the betas	
+# MF      String   ---          Location to read column indices of X excluding the baseline factors if available
+# P       String   ---          Location to store matrix P containing the results of prediction
+# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- A matrix P with the following schema:
+#	P[,1]: linear predictors relative to a baseline which contains the mean values for each feature
+#	   	   i.e., (Y[3:] - colMeans (X[3:])) %*% b
+#	P[,2]: standard error of linear predictors
+#	P[,3]: risk relative to a baseline which contains the mean values for each feature
+#		   i.e., exp ((Y[3:] - colMeans (X[3:])) %*% b)
+#	P[,4]: standard error of risk 
+#	P[,5]: estimates of cumulative hazard
+#	P[,6]: standard error of the estimates of cumulative hazard
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f cox-predict.dml -nvargs X=INPUT_DIR/X RT=INPUT_DIR/RT M=INPUT_DIR/M Y=INPUT_DIR/Y 
+#											  COV=INTPUT_DIR/COV MF=INPUT_DIR/MF P=OUTPUT_DIR/P fmt=csv
+
+fileX = $X;
+fileRT = $RT;
+fileMF = $MF;
+fileY = $Y; 
+fileM = $M;
+fileCOV = $COV;
+fileP = $P; 
+
+# Default values of some parameters
+fmtO = ifdef ($fmt, "text");       # $fmt="text" 
+
+X_orig = read (fileX);
+RT_X = read (fileRT); 
+Y_orig = read (fileY);
+M = read (fileM);
+b = M[,1];
+COV = read (fileCOV);
+
+col_ind = read (fileMF);
+tab = table (col_ind, seq (1, nrow (col_ind)), ncol (Y_orig), nrow (col_ind));
+Y_orig = Y_orig %*% tab;
+
+
+# Y and X have the same dimensions and schema
+if (ncol (Y_orig) != ncol (X_orig)) { 
+	stop ("Y has a wrong number of columns!");
+}
+
+X = X_orig[,3:ncol (X_orig)];
+T_X = X_orig[,1]; 
+E_X = X_orig[,2];
+D = ncol (X);
+N = nrow (X);
+Y_orig = order (target = Y_orig, by = 1);
+Y = Y_orig[,3:ncol (X_orig)];
+T_Y = Y_orig[,1];
+
+col_means = colMeans (X);
+ones = matrix (1, rows = nrow (Y), cols = 1);
+Y_rel = Y - (ones %*% col_means);
+
+##### compute linear predictors
+LP = Y_rel %*% b; 
+# compute standard error of linear predictors using the Delta method
+se_LP = diag(sqrt (Y_rel %*% COV %*% t(Y_rel)));
+
+##### compute risk
+R = exp (Y_rel %*% b);
+# compute standard error of risk using the Delta method
+se_R = diag(sqrt ((Y_rel * R) %*% COV %*% t(Y_rel * R))) / sqrt (exp (LP));
+
+##### compute estimates of cumulative hazard together with their standard errors: 
+# 1. col contains cumulative hazard estimates
+# 2. col contains standard errors for cumulative hazard estimates
+ 
+d_r = aggregate (target = E_X, groups = RT_X, fn = "sum"); 
+e_r = aggregate (target = RT_X, groups = RT_X, fn = "count");
+Idx = cumsum (e_r); 
+all_times = table (seq (1, nrow (Idx), 1), Idx) %*% T_X; # distinct event times 
+
+event_times = removeEmpty (target = ppred (d_r, 0, ">") * all_times, margin = "rows");
+num_distinct_event = nrow (event_times);
+
+num_distinct = nrow (all_times); # no. of distinct timestamps censored or uncensored
+I_rev = table (seq (1, num_distinct, 1), seq (num_distinct, 1, -1));
+e_r_rev_agg = cumsum (I_rev %*% e_r);
+select = t (colSums (table (seq (1, num_distinct), e_r_rev_agg))); 
+
+min_event_time = min (event_times);
+max_event_time = max (event_times);
+T_Y = T_Y + (min_event_time * ppred (T_Y, min_event_time, "<"));
+T_Y = T_Y + (max_event_time * ppred (T_Y, max_event_time, ">"));
+
+Ind = outer (T_Y, t (event_times), ">=");
+Ind = table (seq (1, nrow (T_Y)), rowIndexMax (Ind), nrow (T_Y), num_distinct_event);
+
+exp_Xb = exp (X %*% b);
+exp_Xb_agg = aggregate (target = exp_Xb, groups = RT_X, fn = "sum");
+exp_Xb_cum = I_rev %*% cumsum (I_rev %*% exp_Xb_agg);
+
+H0 = cumsum (removeEmpty (target = d_r / exp_Xb_cum, margin = "rows"));
+P1 = cumsum (removeEmpty (target = d_r / exp_Xb_cum ^ 2, margin = "rows"));
+X_exp_Xb = X * exp (X %*% b);
+
+I_rev_all = table (seq (1, N, 1), seq (N, 1, -1));
+X_exp_Xb_rev_agg = cumsum (I_rev_all %*% X_exp_Xb);
+X_exp_Xb_rev_agg = removeEmpty (target = X_exp_Xb_rev_agg * select, margin = "rows"); 
+X_exp_Xb_cum = I_rev %*% X_exp_Xb_rev_agg; 
+P2 = cumsum (removeEmpty (target = (X_exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
+
+exp_Yb = exp (Y %*% b);	
+exp_Yb_2 = exp_Yb ^ 2;
+Y_exp_Yb = Y * exp (Y %*% b);
+
+# estimates of cumulative hazard
+H = exp_Yb * (Ind %*% H0);
+
+# term1
+term1 = exp_Yb_2 * (Ind %*% P1);
+
+# term2
+P3 = cumsum (removeEmpty (target = (exp_Xb_cum * d_r) / exp_Xb_cum ^ 2, margin = "rows"));
+P4 = (Ind %*% P2) * exp_Yb;
+P5 = Y_exp_Yb * (Ind %*% P3);
+term2 =  P4 - P5; 
+
+# standard error of the estimates of cumulative hazard
+se_H = sqrt (term1 + rowSums((term2 %*% COV) * term2));
+
+# prepare output matrix
+P = matrix (0, rows = nrow (Y), cols = 6);
+P[,1] = LP;
+P[,2] = se_LP;
+P[,3] = R; 
+P[,4] = se_R;
+P[,5] = H;
+P[,6] = se_H;
+write (P, fileP, format=fmtO);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index 3cab74d..6da22ce 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -1,502 +1,502 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT FITS A COX PROPORTIONAL HAZARD REGRESSION MODEL.
-# The Breslow method is used for handling ties and the regression parameters 
-# are computed using trust region newton method with conjugate gradient 
-# 
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---          Location to read the input matrix X containing the survival data containing the following information
-# 								 - 1: timestamps 
-#								 - 2: whether an event occurred (1) or data is censored (0)
-#								 - 3: feature vectors 	
-# TE	  String   ---          Column indices of X as a column vector which contain timestamp (first row) and event information (second row)
-# F 	  String   " "			Column indices of X as a column vector which are to be used for fitting the Cox model
-# R   	  String   " "	        If factors (categorical variables) are available in the input matrix X, location to read matrix R containing 
-#								the start and end indices of the factors in X
-#								 - R[,1]: start indices
-#								 - R[,2]: end indices	
-#								Alternatively, user can specify the indices of the baseline level of each factor which needs to be removed from X; 
-#								in this case the start and end indices corresponding to the baseline level need to be the same;
-#								if R is not provided by default all variables are considered to be continuous 
-# M       String   ---          Location to store the results of Cox regression analysis including estimated regression parameters of the fitted 
-#								Cox model (the betas), their standard errors, confidence intervals, and P-values 
-# S       String   " "          Location to store a summary of some statistics of the fitted cox proportional hazard model including			
-#								no. of records, no. of events, log-likelihood, AIC, Rsquare (Cox & Snell), and max possible Rsquare; 
-#								by default is standard output  
-# T       String   " "          Location to store the results of Likelihood ratio test, Wald test, and Score (log-rank) test of the fitted model;
-#								by default is standard output 
-# COV	  String   ---			Location to store the variance-covariance matrix of the betas
-# RT      String   ---			Location to store matrix RT containing the order-preserving recoded timestamps from X 
-# XO      String   ---			Location to store sorted input matrix by the timestamps 
-# MF      String   ---          Location to store column indices of X excluding the baseline factors if available
-# alpha   Double   0.05         Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
-# tol     Double   0.000001     Tolerance ("epsilon")
-# moi     Int      100     		Max. number of outer (Newton) iterations
-# mii     Int      0      		Max. number of inner (conjugate gradient) iterations, 0 = no max   
-# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
-#	M[,1]: betas	
-#	M[,2]: exp(betas)
-#	M[,3]: standard error of betas
-#	M[,4]: Z 
-#	M[,5]: P-value
-#	M[,6]: lower 100*(1-alpha)% confidence interval of betas
-#	M[,7]: upper 100*(1-alpha)% confidence interval of betas
-#
-# Two log files containing a summary of some statistics of the fitted model:
-# 1- File S with the following format 
-#	- line 1: no. of observations
-#	- line 2: no. of events
-#   - line 3: log-likelihood 
-#	- line 4: AIC
-#	- line 5: Rsquare (Cox & Snell)
-#	- line 6: max possible Rsquare
-# 2- File T with the following format
-#	- line 1: Likelihood ratio test statistic, degree of freedom, P-value
-#	- line 2: Wald test statistic, degree of freedom, P-value
-#	- line 3: Score (log-rank) test statistic, degree of freedom, P-value
-# 
-# Additionally, the following matrices are stored (needed for prediction)
-# 1- A column matrix RT that contains the order-preserving recoded timestamps from X 
-# 2- Matrix XO which is matrix X with sorted timestamps  
-# 3- Variance-covariance matrix of the betas COV
-# 4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
-# -------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f Cox.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE F=INTPUT_DIR/F R=INTPUT_DIR/R 
-#									  M=OUTPUT_DIR/M S=OUTPUT_DIR/S T=OUTPUT_DIR/T COV=OUTPUT_DIR/COV RT=OUTPUT_DIR/RT 
-#									  XO=OUTPUT_DIR/XO MF=OUTPUT/MF alpha=0.05 tol=0.000001 moi=100 mii=20 fmt=csv
-
-fileX = $X;
-fileTE = $TE;
-fileRT = $RT;
-fileMF = $MF;
-fileM = $M;
-fileXO = $XO;
-fileCOV = $COV;
-
-# Default values of some parameters
-fileF = ifdef ($F, " ");			 # $F=" "
-fileR = ifdef ($R, " ");         	 # $R=" " 
-fileS = ifdef ($S, " ");         	 # $S=" " 
-fileT = ifdef ($T, " ");         	 # $T=" " 
-fmtO = ifdef ($fmt, "text");         # $fmt="text" 
-alpha = ifdef ($alpha, 0.05);        # $alpha=0.05
-tol = ifdef ($tol, 0.000001);   	 # $tol=0.000001;
-maxiter = ifdef ($moi, 100);    	 # $moi=100;
-maxinneriter = ifdef ($mii, 0);      # $mii=0;
-
-X_orig = read (fileX);
-
-TE = read (fileTE);
-if (fileF != " ") {
-	F = read (fileF);
-}
-
-######## CHECK FOR FACTORS AND REMOVE THE BASELINE OF EACH FACTOR FROM THE DATASET
-
-if (fileR != " ") { # factors available
-	R = read (fileR);
-	if (ncol (R) != 2) {
-		stop ("Matrix R has wrong dimensions!");
-	}
-	print ("REMOVING BASLINE LEVEL OF EACH FACTOR...");	
-	# identify baseline columns to be removed from X_orig
-	col_sum = colSums (X_orig);
-	col_seq = t (seq(1, ncol (X_orig)));
-	parfor (i in 1:nrow (R), check = 0) {
-		start_ind = as.scalar (R[i,1]);
-		end_ind = as.scalar (R[i,2]);
-		baseline_ind = as.scalar (rowIndexMax (col_sum[1, start_ind:end_ind])) + start_ind - 1;
-		col_seq[,baseline_ind] = 0;
-	}
-	ones = matrix (1, rows = nrow (F), cols = 1);
-	F_filter = table (ones, F, 1, ncol (X_orig));
-	F_filter = removeEmpty (target = F_filter * col_seq, margin = "cols");
-	TE_F = t(append (t (TE), F_filter));
-} else if (fileF != " ") { # all features scale
-	TE_F = t(append (t (TE), t(F)));
-} else { # no features available 
-	TE_F = TE;
-}
-
-write (TE_F, fileMF, format = fmtO);
-
-X_orig = X_orig %*% table (TE_F, seq (1, nrow (TE_F)), ncol (X_orig), nrow (TE_F));
-
-######## RECODING TIMESTAMPS PRESERVING THE ORDER
-print ("RECODING TIMESTAMPS...");
-
-N = nrow (X_orig);
-X_orig = order (target = X_orig, by = 1);
-Idx = matrix (1, rows = N, cols = 1);
-num_timestamps = 1;
-if (N == 1) {
-	RT = matrix (1, rows = 1, cols = 1);
-} else {
-	Idx[2:N,1] = ppred (X_orig[1:(N - 1),1], X_orig[2:N,1], "!=");
-	num_timestamps = sum (Idx);
-	A = removeEmpty (target = diag (Idx), margin = "cols");
-	if (ncol (A) > 1) {
-		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
-		B = cumsum (A);
-		RT = B %*% seq(1, ncol(B));
-	} else { # there is only one group
-		RT = matrix (1, rows = N, cols = 1);
-	}
-}
-E = X_orig[,2]; 
-
-print ("BEGIN COX PROPORTIONAL HAZARD SCRIPT");
-
-######## PARAMETERS OF THE TRUST REGION NEWTON METHOD WITH CONJUGATE GRADIENT
-#  b: the regression betas
-#  o: loss function value
-#  g: loss function gradient
-#  H: loss function Hessian
-# sb: shift of b in one iteration
-# so: shift of o in one iteration
-#  r: CG residual = H %*% sb + g
-#  d: CG direction vector
-# Hd: = H %*% d
-#  c: scalar coefficient in CG
-# delta: trust region size
-# tol: tolerance value
-#  i: outer (Newton) iteration count
-#  j: inner (CG) iteration count
-
-# computing initial coefficients b (all initialized to 0)
-if (ncol (X_orig) > 2) {
-	X = X_orig[,3:ncol(X_orig)];
-	D = ncol (X);
-	zeros_D = matrix (0, rows = D, cols = 1);
-	b = zeros_D; 
-}
-d_r = aggregate (target = E, groups = RT, fn = "sum"); 
-e_r = aggregate (target = RT, groups = RT, fn = "count");
-
-# computing initial loss function value o
-num_distinct = nrow (d_r); # no. of distinct timestamps
-e_r_rev_agg = cumsum (rev(e_r));
-d_r_rev = rev(d_r);
-o = sum (d_r_rev * log (e_r_rev_agg));
-o_init = o;
-if (ncol (X_orig) < 3) {
-	loglik = -o;
-	S_str = "no. of records " + N + " loglik " + loglik;
-	if (fileS != " ") {
-		write (S_str, fileS, format = fmtO);
-	} else {
-		print (S_str);
-	}
-	stop ("No features are selected!");
-}
-
-# computing initial gradient g 
-# part 1 g0_1
-g0_1 = - t (colSums (X * E)); # g_1
-# part 2 g0_2
-X_rev_agg = cumsum (rev(X));
-select = table (seq (1, num_distinct), e_r_rev_agg);
-X_agg = select %*% X_rev_agg;
-g0_2 = t (colSums ((X_agg * d_r_rev)/ e_r_rev_agg));
-#
-g0 = g0_1 + g0_2;
-g = g0; 
-
-# initialization for trust region Newton method
-delta = 0.5 * sqrt (D) / max (sqrt (rowSums (X ^ 2))); 
-initial_g2 = sum (g ^ 2);
-exit_g2 = initial_g2 * tol ^ 2;
-maxiter = 100; 
-maxinneriter = min (D, 100); 
-i = 0;
-sum_g2 = sum (g ^ 2);
-while (sum_g2 > exit_g2 & i < maxiter) {  
-	i = i + 1;
-    sb = zeros_D;  
-    r = g;  
-    r2 = sum (r ^ 2);  
-    exit_r2 = 0.01 * r2;
-    d = - r;  
-    trust_bound_reached = FALSE;  
-    j = 0;
-
-	exp_Xb = exp (X %*% b);
-	exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
-	D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
-	X_exp_Xb = X * exp_Xb;
-	X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-	X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
-
-    while (r2 > exit_r2 & (! trust_bound_reached) & j < maxinneriter) { 
-        j = j + 1;
-		# computing Hessian times d (Hd)
-		# part 1 Hd_1
-		Xd = X %*% d;
-		X_Xd_exp_Xb = X * (Xd * exp_Xb);
-		X_Xd_exp_Xb_rev_agg = cumsum (rev(X_Xd_exp_Xb));
-		X_Xd_exp_Xb_rev_agg = select %*% X_Xd_exp_Xb_rev_agg;
-		
-		Hd_1 = X_Xd_exp_Xb_rev_agg / D_r_rev;
-		# part 2 Hd_2
-		
-		Xd_exp_Xb = Xd * exp_Xb;
-		Xd_exp_Xb_rev_agg = cumsum (rev(Xd_exp_Xb));
-		Xd_exp_Xb_rev_agg = select %*% Xd_exp_Xb_rev_agg;
-		
-		Hd_2_num = X_exp_Xb_rev_agg * Xd_exp_Xb_rev_agg; # numerator
-		Hd_2 = Hd_2_num / (D_r_rev ^ 2);
-		
-		Hd = t (colSums ((Hd_1 - Hd_2) * d_r_rev));
-
-		c = r2 / sum (d * Hd);
-        [c, trust_bound_reached] = ensure_trust_bound (c, sum(d ^ 2), 2 * sum(sb * d), sum(sb ^ 2) - delta ^ 2);
-        sb = sb + c * d;  
-        r = r + c * Hd;  
-        r2_new = sum (r ^ 2);
-        d = - r + (r2_new / r2) * d;  
-        r2 = r2_new;
-    }
-
-    # computing loss change in 1 iteration (so)
-	# part 1 so_1
-	so_1 = - as.scalar (colSums (X * E) %*% (b + sb)); 
-	# part 2 so_2
-	exp_Xbsb = exp (X %*% (b + sb));
-	exp_Xbsb_agg = aggregate (target = exp_Xbsb, groups = RT, fn = "sum");
-	so_2 = sum (d_r_rev * log (cumsum (rev(exp_Xbsb_agg))));
-	#
-	so = so_1 + so_2;
-	so = so - o; 
-	
-	delta = update_trust_bound (delta, sqrt (sum (sb ^ 2)), so, sum (sb * g), 0.5 * sum (sb * (r + g)));
-    if (so < 0) {
-        b = b + sb;  
-        o = o + so;  
-		# compute new gradient g
-		exp_Xb = exp (X %*% b);
-		exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
-		X_exp_Xb = X * exp_Xb;
-		X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-		X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
-		
-		D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
-		g_2 = t (colSums ((X_exp_Xb_rev_agg / D_r_rev) * d_r_rev));
-		g = g0_1 + g_2;
-		sum_g2 = sum (g ^ 2);
-    }
-}
-
-if (sum_g2 > exit_g2 & i >= maxiter) {
-	print ("Trust region Newton method did not converge!");
-}
-
-
-print ("COMPUTING HESSIAN...");
-
-H0 = matrix (0, rows = D, cols = D);
-H = matrix (0, rows = D, cols = D);
-
-X_exp_Xb_rev_2 = rev(X_exp_Xb);
-X_rev_2 = rev(X);
-
-X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
-X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg; 
-
-parfor (i in 1:D, check = 0) {
-	Xi = X[,i];
-	Xi_rev = rev(Xi);
-
-	## ----------Start calculating H0--------------	
-	# part 1 H0_1
-	Xi_X = X_rev_2[,i:D] * Xi_rev;
-	Xi_X_rev_agg = cumsum (Xi_X);
-	Xi_X_rev_agg = select %*% Xi_X_rev_agg;
-	H0_1 = Xi_X_rev_agg / e_r_rev_agg;
-
-	# part 2 H0_2
-	Xi_agg = aggregate (target = Xi, groups = RT, fn = "sum");
-	Xi_agg_rev_agg = cumsum (rev(Xi_agg));	
-	H0_2_num = X_agg[,i:D] * Xi_agg_rev_agg; # numerator
-	H0_2 = H0_2_num / (e_r_rev_agg ^ 2);
-
-	H0[i,i:D] = colSums ((H0_1 - H0_2) * d_r_rev);
-	#-----------End calculating H0--------------------
-		
-	## ----------Start calculating H--------------
-	# part 1 H_1
-	Xi_X_exp_Xb = X_exp_Xb_rev_2[,i:D] * Xi_rev;
-	Xi_X_exp_Xb_rev_agg = cumsum (Xi_X_exp_Xb);
-	Xi_X_exp_Xb_rev_agg = select %*% Xi_X_exp_Xb_rev_agg;
-	H_1 = Xi_X_exp_Xb_rev_agg / D_r_rev;
-
-	# part 2 H_2
-	Xi_exp_Xb = exp_Xb * Xi;
-	Xi_exp_Xb_agg = aggregate (target = Xi_exp_Xb, groups = RT, fn = "sum"); 
-	
-	Xi_exp_Xb_agg_rev_agg = cumsum (rev(Xi_exp_Xb_agg));
-	H_2_num = X_exp_Xb_rev_agg[,i:D] * Xi_exp_Xb_agg_rev_agg; # numerator
-	H_2 = H_2_num / (D_r_rev ^ 2);
-	H[i,i:D] = colSums ((H_1 - H_2) * d_r_rev);
-	#-----------End calculating H--------------------
-}
-H = H + t(H) - diag( diag (H));
-H0 = H0 + t(H0) - diag( diag (H0));
-
-
-# compute standard error for betas
-H_inv = inv (H);
-se_b = sqrt (diag (H_inv));
-
-# compute exp(b), Z, Pr[>|Z|]
-exp_b = exp (b);
-Z = b / se_b;
-P = matrix (0, rows = D, cols = 1);
-parfor (i in 1:D) {
-	P[i,1] = 2 - 2 * (cdf (target = abs (as.scalar (Z[i,1])), dist = "normal"));
-}
-
-# compute confidence intervals for b
-z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
-CI_l = b - se_b * z_alpha_2;
-CI_r = b - se_b + z_alpha_2;
-
-######## SOME STATISTICS AND TESTS
-# no. of records
-S_str = "no. of records " + N;
-
-# no.of events
-S_str = append (S_str, "no. of events " + sum (E));
-
-# log-likelihood
-loglik = -o;
-S_str = append (S_str, "loglik " + loglik + " ");
-
-# AIC = -2 * loglik + 2 * D
-AIC = -2 * loglik + 2 * D;
-S_str = append (S_str, "AIC " + AIC + " ");
-
-# Wald test
-wald_t = as.scalar (t(b) %*% H %*% b);
-wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
-T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
-
-# Likelihood ratio test
-lratio_t = 2 * o_init - 2 * o;
-lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
-T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
-
-
-H0_inv = inv (H0);
-score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
-score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
-T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
-
-# Rsquare (Cox & Snell)
-Rsquare = 1 - exp (-lratio_t / N);  
-Rsquare_max = 1 - exp (-2 * o_init / N);
-S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
-S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
-
-M = matrix (0, rows = D, cols = 7);
-M[,1] = b;
-M[,2] = exp_b;
-M[,3] = se_b;
-M[,4] = Z;
-M[,5] = P;
-M[,6] = CI_l;
-M[,7] = CI_r;
-
-write (M, fileM, format = fmtO);
-if (fileS != " ") {
-	write (S_str, fileS, format = fmtO);
-} else {
-	print (S_str);
-}
-if (fileT != " ") {
-	write (T_str, fileT, format = fmtO);
-} else {
-	print (T_str);
-}
-# needed for prediction
-write (RT, fileRT, format = fmtO);
-write (H_inv, fileCOV, format = fmtO);
-write (X_orig, fileXO, format = fmtO);
-
-
-####### UDFS FOR TRUST REGION NEWTON METHOD
-
-ensure_trust_bound =
-    function (double x, double a, double b, double c)
-    return (double x_new, boolean is_violated)
-{
-    if (a * x^2 + b * x + c > 0)
-    {
-        is_violated = TRUE;
-        rad = sqrt (b ^ 2 - 4 * a * c);
-        if (b >= 0) {
-            x_new = - (2 * c) / (b + rad);
-        } else {
-            x_new = - (b - rad) / (2 * a);
-        }
-    } else {
-        is_violated = FALSE;
-        x_new = x;
-    }
-}
-
-update_trust_bound =
-    function (double delta,
-              double sb_distance,
-              double so_exact,
-              double so_linear_approx,
-              double so_quadratic_approx)
-    return   (double delta)
-{
-    sigma1 = 0.25;
-    sigma2 = 0.5;
-    sigma3 = 4.0;
-
-    if (so_exact <= so_linear_approx) {
-       alpha = sigma3;
-    } else {
-       alpha = max (sigma1, - 0.5 * so_linear_approx / (so_exact - so_linear_approx));
-    }
-
-    rho = so_exact / so_quadratic_approx;
-    if (rho < 0.0001) {
-        delta = min (max (alpha, sigma1) * sb_distance, sigma2 * delta);
-    } else { if (rho < 0.25) {
-        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma2 * delta));
-    } else { if (rho < 0.75) {
-        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma3 * delta));
-    } else {
-        delta = max (delta, min (alpha * sb_distance, sigma3 * delta));
-    }}} 
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT FITS A COX PROPORTIONAL HAZARD REGRESSION MODEL.
+# The Breslow method is used for handling ties and the regression parameters 
+# are computed using trust region newton method with conjugate gradient 
+# 
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---          Location to read the input matrix X containing the survival data containing the following information
+# 								 - 1: timestamps 
+#								 - 2: whether an event occurred (1) or data is censored (0)
+#								 - 3: feature vectors 	
+# TE	  String   ---          Column indices of X as a column vector which contain timestamp (first row) and event information (second row)
+# F 	  String   " "			Column indices of X as a column vector which are to be used for fitting the Cox model
+# R   	  String   " "	        If factors (categorical variables) are available in the input matrix X, location to read matrix R containing 
+#								the start and end indices of the factors in X
+#								 - R[,1]: start indices
+#								 - R[,2]: end indices	
+#								Alternatively, user can specify the indices of the baseline level of each factor which needs to be removed from X; 
+#								in this case the start and end indices corresponding to the baseline level need to be the same;
+#								if R is not provided by default all variables are considered to be continuous 
+# M       String   ---          Location to store the results of Cox regression analysis including estimated regression parameters of the fitted 
+#								Cox model (the betas), their standard errors, confidence intervals, and P-values 
+# S       String   " "          Location to store a summary of some statistics of the fitted cox proportional hazard model including			
+#								no. of records, no. of events, log-likelihood, AIC, Rsquare (Cox & Snell), and max possible Rsquare; 
+#								by default is standard output  
+# T       String   " "          Location to store the results of Likelihood ratio test, Wald test, and Score (log-rank) test of the fitted model;
+#								by default is standard output 
+# COV	  String   ---			Location to store the variance-covariance matrix of the betas
+# RT      String   ---			Location to store matrix RT containing the order-preserving recoded timestamps from X 
+# XO      String   ---			Location to store sorted input matrix by the timestamps 
+# MF      String   ---          Location to store column indices of X excluding the baseline factors if available
+# alpha   Double   0.05         Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
+# tol     Double   0.000001     Tolerance ("epsilon")
+# moi     Int      100     		Max. number of outer (Newton) iterations
+# mii     Int      0      		Max. number of inner (conjugate gradient) iterations, 0 = no max   
+# fmt     String   "text"       Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
+#	M[,1]: betas	
+#	M[,2]: exp(betas)
+#	M[,3]: standard error of betas
+#	M[,4]: Z 
+#	M[,5]: P-value
+#	M[,6]: lower 100*(1-alpha)% confidence interval of betas
+#	M[,7]: upper 100*(1-alpha)% confidence interval of betas
+#
+# Two log files containing a summary of some statistics of the fitted model:
+# 1- File S with the following format 
+#	- line 1: no. of observations
+#	- line 2: no. of events
+#   - line 3: log-likelihood 
+#	- line 4: AIC
+#	- line 5: Rsquare (Cox & Snell)
+#	- line 6: max possible Rsquare
+# 2- File T with the following format
+#	- line 1: Likelihood ratio test statistic, degree of freedom, P-value
+#	- line 2: Wald test statistic, degree of freedom, P-value
+#	- line 3: Score (log-rank) test statistic, degree of freedom, P-value
+# 
+# Additionally, the following matrices are stored (needed for prediction)
+# 1- A column matrix RT that contains the order-preserving recoded timestamps from X 
+# 2- Matrix XO which is matrix X with sorted timestamps  
+# 3- Variance-covariance matrix of the betas COV
+# 4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
+# -------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f Cox.dml -nvargs X=INPUT_DIR/X TE=INPUT_DIR/TE F=INTPUT_DIR/F R=INTPUT_DIR/R 
+#									  M=OUTPUT_DIR/M S=OUTPUT_DIR/S T=OUTPUT_DIR/T COV=OUTPUT_DIR/COV RT=OUTPUT_DIR/RT 
+#									  XO=OUTPUT_DIR/XO MF=OUTPUT/MF alpha=0.05 tol=0.000001 moi=100 mii=20 fmt=csv
+
+fileX = $X;
+fileTE = $TE;
+fileRT = $RT;
+fileMF = $MF;
+fileM = $M;
+fileXO = $XO;
+fileCOV = $COV;
+
+# Default values of some parameters
+fileF = ifdef ($F, " ");			 # $F=" "
+fileR = ifdef ($R, " ");         	 # $R=" " 
+fileS = ifdef ($S, " ");         	 # $S=" " 
+fileT = ifdef ($T, " ");         	 # $T=" " 
+fmtO = ifdef ($fmt, "text");         # $fmt="text" 
+alpha = ifdef ($alpha, 0.05);        # $alpha=0.05
+tol = ifdef ($tol, 0.000001);   	 # $tol=0.000001;
+maxiter = ifdef ($moi, 100);    	 # $moi=100;
+maxinneriter = ifdef ($mii, 0);      # $mii=0;
+
+X_orig = read (fileX);
+
+TE = read (fileTE);
+if (fileF != " ") {
+	F = read (fileF);
+}
+
+######## CHECK FOR FACTORS AND REMOVE THE BASELINE OF EACH FACTOR FROM THE DATASET
+
+if (fileR != " ") { # factors available
+	R = read (fileR);
+	if (ncol (R) != 2) {
+		stop ("Matrix R has wrong dimensions!");
+	}
+	print ("REMOVING BASLINE LEVEL OF EACH FACTOR...");	
+	# identify baseline columns to be removed from X_orig
+	col_sum = colSums (X_orig);
+	col_seq = t (seq(1, ncol (X_orig)));
+	parfor (i in 1:nrow (R), check = 0) {
+		start_ind = as.scalar (R[i,1]);
+		end_ind = as.scalar (R[i,2]);
+		baseline_ind = as.scalar (rowIndexMax (col_sum[1, start_ind:end_ind])) + start_ind - 1;
+		col_seq[,baseline_ind] = 0;
+	}
+	ones = matrix (1, rows = nrow (F), cols = 1);
+	F_filter = table (ones, F, 1, ncol (X_orig));
+	F_filter = removeEmpty (target = F_filter * col_seq, margin = "cols");
+	TE_F = t(append (t (TE), F_filter));
+} else if (fileF != " ") { # all features scale
+	TE_F = t(append (t (TE), t(F)));
+} else { # no features available 
+	TE_F = TE;
+}
+
+write (TE_F, fileMF, format = fmtO);
+
+X_orig = X_orig %*% table (TE_F, seq (1, nrow (TE_F)), ncol (X_orig), nrow (TE_F));
+
+######## RECODING TIMESTAMPS PRESERVING THE ORDER
+print ("RECODING TIMESTAMPS...");
+
+N = nrow (X_orig);
+X_orig = order (target = X_orig, by = 1);
+Idx = matrix (1, rows = N, cols = 1);
+num_timestamps = 1;
+if (N == 1) {
+	RT = matrix (1, rows = 1, cols = 1);
+} else {
+	Idx[2:N,1] = ppred (X_orig[1:(N - 1),1], X_orig[2:N,1], "!=");
+	num_timestamps = sum (Idx);
+	A = removeEmpty (target = diag (Idx), margin = "cols");
+	if (ncol (A) > 1) {
+		A[,1:(ncol (A) - 1)] = A[,1:(ncol (A) - 1)] - A[,2:ncol (A)];
+		B = cumsum (A);
+		RT = B %*% seq(1, ncol(B));
+	} else { # there is only one group
+		RT = matrix (1, rows = N, cols = 1);
+	}
+}
+E = X_orig[,2]; 
+
+print ("BEGIN COX PROPORTIONAL HAZARD SCRIPT");
+
+######## PARAMETERS OF THE TRUST REGION NEWTON METHOD WITH CONJUGATE GRADIENT
+#  b: the regression betas
+#  o: loss function value
+#  g: loss function gradient
+#  H: loss function Hessian
+# sb: shift of b in one iteration
+# so: shift of o in one iteration
+#  r: CG residual = H %*% sb + g
+#  d: CG direction vector
+# Hd: = H %*% d
+#  c: scalar coefficient in CG
+# delta: trust region size
+# tol: tolerance value
+#  i: outer (Newton) iteration count
+#  j: inner (CG) iteration count
+
+# computing initial coefficients b (all initialized to 0)
+if (ncol (X_orig) > 2) {
+	X = X_orig[,3:ncol(X_orig)];
+	D = ncol (X);
+	zeros_D = matrix (0, rows = D, cols = 1);
+	b = zeros_D; 
+}
+d_r = aggregate (target = E, groups = RT, fn = "sum"); 
+e_r = aggregate (target = RT, groups = RT, fn = "count");
+
+# computing initial loss function value o
+num_distinct = nrow (d_r); # no. of distinct timestamps
+e_r_rev_agg = cumsum (rev(e_r));
+d_r_rev = rev(d_r);
+o = sum (d_r_rev * log (e_r_rev_agg));
+o_init = o;
+if (ncol (X_orig) < 3) {
+	loglik = -o;
+	S_str = "no. of records " + N + " loglik " + loglik;
+	if (fileS != " ") {
+		write (S_str, fileS, format = fmtO);
+	} else {
+		print (S_str);
+	}
+	stop ("No features are selected!");
+}
+
+# computing initial gradient g 
+# part 1 g0_1
+g0_1 = - t (colSums (X * E)); # g_1
+# part 2 g0_2
+X_rev_agg = cumsum (rev(X));
+select = table (seq (1, num_distinct), e_r_rev_agg);
+X_agg = select %*% X_rev_agg;
+g0_2 = t (colSums ((X_agg * d_r_rev)/ e_r_rev_agg));
+#
+g0 = g0_1 + g0_2;
+g = g0; 
+
+# initialization for trust region Newton method
+delta = 0.5 * sqrt (D) / max (sqrt (rowSums (X ^ 2))); 
+initial_g2 = sum (g ^ 2);
+exit_g2 = initial_g2 * tol ^ 2;
+maxiter = 100; 
+maxinneriter = min (D, 100); 
+i = 0;
+sum_g2 = sum (g ^ 2);
+while (sum_g2 > exit_g2 & i < maxiter) {  
+	i = i + 1;
+    sb = zeros_D;  
+    r = g;  
+    r2 = sum (r ^ 2);  
+    exit_r2 = 0.01 * r2;
+    d = - r;  
+    trust_bound_reached = FALSE;  
+    j = 0;
+
+	exp_Xb = exp (X %*% b);
+	exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
+	D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
+	X_exp_Xb = X * exp_Xb;
+	X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+	X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
+
+    while (r2 > exit_r2 & (! trust_bound_reached) & j < maxinneriter) { 
+        j = j + 1;
+		# computing Hessian times d (Hd)
+		# part 1 Hd_1
+		Xd = X %*% d;
+		X_Xd_exp_Xb = X * (Xd * exp_Xb);
+		X_Xd_exp_Xb_rev_agg = cumsum (rev(X_Xd_exp_Xb));
+		X_Xd_exp_Xb_rev_agg = select %*% X_Xd_exp_Xb_rev_agg;
+		
+		Hd_1 = X_Xd_exp_Xb_rev_agg / D_r_rev;
+		# part 2 Hd_2
+		
+		Xd_exp_Xb = Xd * exp_Xb;
+		Xd_exp_Xb_rev_agg = cumsum (rev(Xd_exp_Xb));
+		Xd_exp_Xb_rev_agg = select %*% Xd_exp_Xb_rev_agg;
+		
+		Hd_2_num = X_exp_Xb_rev_agg * Xd_exp_Xb_rev_agg; # numerator
+		Hd_2 = Hd_2_num / (D_r_rev ^ 2);
+		
+		Hd = t (colSums ((Hd_1 - Hd_2) * d_r_rev));
+
+		c = r2 / sum (d * Hd);
+        [c, trust_bound_reached] = ensure_trust_bound (c, sum(d ^ 2), 2 * sum(sb * d), sum(sb ^ 2) - delta ^ 2);
+        sb = sb + c * d;  
+        r = r + c * Hd;  
+        r2_new = sum (r ^ 2);
+        d = - r + (r2_new / r2) * d;  
+        r2 = r2_new;
+    }
+
+    # computing loss change in 1 iteration (so)
+	# part 1 so_1
+	so_1 = - as.scalar (colSums (X * E) %*% (b + sb)); 
+	# part 2 so_2
+	exp_Xbsb = exp (X %*% (b + sb));
+	exp_Xbsb_agg = aggregate (target = exp_Xbsb, groups = RT, fn = "sum");
+	so_2 = sum (d_r_rev * log (cumsum (rev(exp_Xbsb_agg))));
+	#
+	so = so_1 + so_2;
+	so = so - o; 
+	
+	delta = update_trust_bound (delta, sqrt (sum (sb ^ 2)), so, sum (sb * g), 0.5 * sum (sb * (r + g)));
+    if (so < 0) {
+        b = b + sb;  
+        o = o + so;  
+		# compute new gradient g
+		exp_Xb = exp (X %*% b);
+		exp_Xb_agg = aggregate (target = exp_Xb, groups = RT, fn = "sum");
+		X_exp_Xb = X * exp_Xb;
+		X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+		X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg;
+		
+		D_r_rev = cumsum (rev(exp_Xb_agg)); # denominator
+		g_2 = t (colSums ((X_exp_Xb_rev_agg / D_r_rev) * d_r_rev));
+		g = g0_1 + g_2;
+		sum_g2 = sum (g ^ 2);
+    }
+}
+
+if (sum_g2 > exit_g2 & i >= maxiter) {
+	print ("Trust region Newton method did not converge!");
+}
+
+
+print ("COMPUTING HESSIAN...");
+
+H0 = matrix (0, rows = D, cols = D);
+H = matrix (0, rows = D, cols = D);
+
+X_exp_Xb_rev_2 = rev(X_exp_Xb);
+X_rev_2 = rev(X);
+
+X_exp_Xb_rev_agg = cumsum (rev(X_exp_Xb));
+X_exp_Xb_rev_agg = select %*% X_exp_Xb_rev_agg; 
+
+parfor (i in 1:D, check = 0) {
+	Xi = X[,i];
+	Xi_rev = rev(Xi);
+
+	## ----------Start calculating H0--------------	
+	# part 1 H0_1
+	Xi_X = X_rev_2[,i:D] * Xi_rev;
+	Xi_X_rev_agg = cumsum (Xi_X);
+	Xi_X_rev_agg = select %*% Xi_X_rev_agg;
+	H0_1 = Xi_X_rev_agg / e_r_rev_agg;
+
+	# part 2 H0_2
+	Xi_agg = aggregate (target = Xi, groups = RT, fn = "sum");
+	Xi_agg_rev_agg = cumsum (rev(Xi_agg));	
+	H0_2_num = X_agg[,i:D] * Xi_agg_rev_agg; # numerator
+	H0_2 = H0_2_num / (e_r_rev_agg ^ 2);
+
+	H0[i,i:D] = colSums ((H0_1 - H0_2) * d_r_rev);
+	#-----------End calculating H0--------------------
+		
+	## ----------Start calculating H--------------
+	# part 1 H_1
+	Xi_X_exp_Xb = X_exp_Xb_rev_2[,i:D] * Xi_rev;
+	Xi_X_exp_Xb_rev_agg = cumsum (Xi_X_exp_Xb);
+	Xi_X_exp_Xb_rev_agg = select %*% Xi_X_exp_Xb_rev_agg;
+	H_1 = Xi_X_exp_Xb_rev_agg / D_r_rev;
+
+	# part 2 H_2
+	Xi_exp_Xb = exp_Xb * Xi;
+	Xi_exp_Xb_agg = aggregate (target = Xi_exp_Xb, groups = RT, fn = "sum"); 
+	
+	Xi_exp_Xb_agg_rev_agg = cumsum (rev(Xi_exp_Xb_agg));
+	H_2_num = X_exp_Xb_rev_agg[,i:D] * Xi_exp_Xb_agg_rev_agg; # numerator
+	H_2 = H_2_num / (D_r_rev ^ 2);
+	H[i,i:D] = colSums ((H_1 - H_2) * d_r_rev);
+	#-----------End calculating H--------------------
+}
+H = H + t(H) - diag( diag (H));
+H0 = H0 + t(H0) - diag( diag (H0));
+
+
+# compute standard error for betas
+H_inv = inv (H);
+se_b = sqrt (diag (H_inv));
+
+# compute exp(b), Z, Pr[>|Z|]
+exp_b = exp (b);
+Z = b / se_b;
+P = matrix (0, rows = D, cols = 1);
+parfor (i in 1:D) {
+	P[i,1] = 2 - 2 * (cdf (target = abs (as.scalar (Z[i,1])), dist = "normal"));
+}
+
+# compute confidence intervals for b
+z_alpha_2 = icdf (target = 1 - alpha / 2, dist = "normal");
+CI_l = b - se_b * z_alpha_2;
+CI_r = b - se_b + z_alpha_2;
+
+######## SOME STATISTICS AND TESTS
+# no. of records
+S_str = "no. of records " + N;
+
+# no.of events
+S_str = append (S_str, "no. of events " + sum (E));
+
+# log-likelihood
+loglik = -o;
+S_str = append (S_str, "loglik " + loglik + " ");
+
+# AIC = -2 * loglik + 2 * D
+AIC = -2 * loglik + 2 * D;
+S_str = append (S_str, "AIC " + AIC + " ");
+
+# Wald test
+wald_t = as.scalar (t(b) %*% H %*% b);
+wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
+T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
+
+# Likelihood ratio test
+lratio_t = 2 * o_init - 2 * o;
+lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
+T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " ");
+
+
+H0_inv = inv (H0);
+score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
+score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
+T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " ");
+
+# Rsquare (Cox & Snell)
+Rsquare = 1 - exp (-lratio_t / N);  
+Rsquare_max = 1 - exp (-2 * o_init / N);
+S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
+S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
+
+M = matrix (0, rows = D, cols = 7);
+M[,1] = b;
+M[,2] = exp_b;
+M[,3] = se_b;
+M[,4] = Z;
+M[,5] = P;
+M[,6] = CI_l;
+M[,7] = CI_r;
+
+write (M, fileM, format = fmtO);
+if (fileS != " ") {
+	write (S_str, fileS, format = fmtO);
+} else {
+	print (S_str);
+}
+if (fileT != " ") {
+	write (T_str, fileT, format = fmtO);
+} else {
+	print (T_str);
+}
+# needed for prediction
+write (RT, fileRT, format = fmtO);
+write (H_inv, fileCOV, format = fmtO);
+write (X_orig, fileXO, format = fmtO);
+
+
+####### UDFS FOR TRUST REGION NEWTON METHOD
+
+ensure_trust_bound =
+    function (double x, double a, double b, double c)
+    return (double x_new, boolean is_violated)
+{
+    if (a * x^2 + b * x + c > 0)
+    {
+        is_violated = TRUE;
+        rad = sqrt (b ^ 2 - 4 * a * c);
+        if (b >= 0) {
+            x_new = - (2 * c) / (b + rad);
+        } else {
+            x_new = - (b - rad) / (2 * a);
+        }
+    } else {
+        is_violated = FALSE;
+        x_new = x;
+    }
+}
+
+update_trust_bound =
+    function (double delta,
+              double sb_distance,
+              double so_exact,
+              double so_linear_approx,
+              double so_quadratic_approx)
+    return   (double delta)
+{
+    sigma1 = 0.25;
+    sigma2 = 0.5;
+    sigma3 = 4.0;
+
+    if (so_exact <= so_linear_approx) {
+       alpha = sigma3;
+    } else {
+       alpha = max (sigma1, - 0.5 * so_linear_approx / (so_exact - so_linear_approx));
+    }
+
+    rho = so_exact / so_quadratic_approx;
+    if (rho < 0.0001) {
+        delta = min (max (alpha, sigma1) * sb_distance, sigma2 * delta);
+    } else { if (rho < 0.25) {
+        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma2 * delta));
+    } else { if (rho < 0.75) {
+        delta = max (sigma1 * delta, min (alpha * sb_distance, sigma3 * delta));
+    } else {
+        delta = max (delta, min (alpha * sb_distance, sigma3 * delta));
+    }}} 
+}