You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/07/08 22:01:18 UTC
[2/2] incubator-systemml git commit: [SYSTEMML-657] Deprecate ppred
built-in function
[SYSTEMML-657] Deprecate ppred built-in function
Closes #184.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9b9d019b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9b9d019b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9b9d019b
Branch: refs/heads/master
Commit: 9b9d019b246827db09754474b8bc81a90c33acf9
Parents: 33c6c4b
Author: Tatsuya.Nishiyama <ni...@gmail.com>
Authored: Fri Jul 8 14:57:25 2016 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Fri Jul 8 14:57:25 2016 -0700
----------------------------------------------------------------------
docs/dml-language-reference.md | 4 +-
scripts/algorithms/ALS-CG.dml | 10 +-
scripts/algorithms/ALS-DS.dml | 12 +-
scripts/algorithms/ALS_predict.dml | 2 +-
scripts/algorithms/ALS_topk_predict.dml | 6 +-
scripts/algorithms/Cox-predict.dml | 6 +-
scripts/algorithms/Cox.dml | 2 +-
scripts/algorithms/CsplineCG.dml | 2 +-
scripts/algorithms/CsplineDS.dml | 2 +-
scripts/algorithms/GLM-predict.dml | 40 ++---
scripts/algorithms/GLM.dml | 146 +++++++++----------
scripts/algorithms/KM.dml | 20 +--
scripts/algorithms/Kmeans-predict.dml | 8 +-
scripts/algorithms/Kmeans.dml | 16 +-
scripts/algorithms/LinearRegCG.dml | 2 +-
scripts/algorithms/LinearRegDS.dml | 2 +-
scripts/algorithms/MultiLogReg.dml | 4 +-
scripts/algorithms/StepGLM.dml | 122 ++++++++--------
scripts/algorithms/StepLinearRegDS.dml | 2 +-
scripts/algorithms/Univar-Stats.dml | 4 +-
scripts/algorithms/decision-tree-predict.dml | 6 +-
scripts/algorithms/decision-tree.dml | 46 +++---
scripts/algorithms/l2-svm-predict.dml | 8 +-
scripts/algorithms/l2-svm.dml | 8 +-
scripts/algorithms/m-svm-predict.dml | 2 +-
scripts/algorithms/m-svm.dml | 10 +-
scripts/algorithms/naive-bayes-predict.dml | 2 +-
.../algorithms/obsolete/naive-bayes-parfor.dml | 4 +-
scripts/algorithms/random-forest-predict.dml | 10 +-
scripts/algorithms/random-forest.dml | 50 +++----
scripts/algorithms/stratstats.dml | 22 +--
scripts/datagen/genRandData4ALS.dml | 2 +-
scripts/datagen/genRandData4Kmeans.dml | 4 +-
.../datagen/genRandData4LinearReg_LTstats.dml | 6 +-
scripts/datagen/genRandData4LogReg_LTstats.dml | 6 +-
.../datagen/genRandData4LogisticRegression.dml | 2 +-
scripts/datagen/genRandData4MultiClassSVM.dml | 2 +-
scripts/datagen/genRandData4Multinomial.dml | 2 +-
scripts/datagen/genRandData4NMF.dml | 2 +-
scripts/datagen/genRandData4NMFBlockwise.dml | 2 +-
scripts/datagen/genRandData4StratStats.dml | 2 +-
scripts/datagen/genRandData4Transform.dml | 6 +-
scripts/staging/knn.dml | 12 +-
scripts/staging/regression/lasso/lasso.dml | 6 +-
scripts/utils/project.dml | 2 +-
scripts/utils/sample.dml | 4 +-
.../sysml/parser/BuiltinFunctionExpression.java | 3 +
47 files changed, 323 insertions(+), 320 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/docs/dml-language-reference.md
----------------------------------------------------------------------
diff --git a/docs/dml-language-reference.md b/docs/dml-language-reference.md
index 720715f..ee16085 100644
--- a/docs/dml-language-reference.md
+++ b/docs/dml-language-reference.md
@@ -148,7 +148,7 @@ SystemML follows same associativity and precedence order as R as described in be
| %/% %% | Matrix or Scalar | Matrix or Scalar<sup>1, 2</sup> | Integer division and Modulus operator
| / * | Matrix or Scalar | Matrix or Scalar<sup>1, 2</sup> | Multiplication and Division
| + - | Matrix or Scalar | Matrix or Scalar<sup>1, 2</sup> | Addition (or string concatenation) and Subtraction
-| < > == != <= >= | Matrix or Scalar (any value type) | Scalar<sup>2</sup> (boolean type) | Relational operators
+| < > == != <= >= | Matrix or Scalar (any value type) | Matrix or Scalar<sup>1, 2</sup> (boolean type) | Relational operators
| & \| ! | Scalar | Scalar | Boolean operators (Note: operators && and \|\| are not supported)
| = | - | - | Assignment (Lowest precendence). Note: associativity of assignment "a = b = 3" is not supported
@@ -634,7 +634,7 @@ Function | Description | Parameters | Example
pmin() <br/> pmax() | "parallel min/max".<br/> Return cell-wise minimum/maximum. If the second input is a scalar then it is compared against all cells in the first input. | Input: (<matrix>, <matrix>), or (<matrix>, <scalar>) <br/> Output: matrix | pmin(X,Y) <br/> pmax(X,y)
rowIndexMax() | Row-wise computation -- for each row, find the max value, and return its column index. | Input: (matrix) <br/> Output: (n x 1) matrix | rowIndexMax(X)
rowIndexMin() | Row-wise computation -- for each row, find the minimum value, and return its column index. | Input: (matrix) <br/> Output: (n x 1) matrix | rowIndexMin(X)
-ppred() | "parallel predicate".<br/> The relational operator specified in the third argument is cell-wise applied to input matrices. If the second argument is a scalar, then it is used against all cells in the first argument. | Input: (<matrix>, <matrix>, <string with relational operator>), or <br/> (<matrix>, <scalar>, <string with relational operator>) <br/> Output: matrix | ppred(X,Y,"<") <br/> ppred(X,y,"<")
+ppred() | "parallel predicate".<br/> The relational operator specified in the third argument is cell-wise applied to input matrices. If the second argument is a scalar, then it is used against all cells in the first argument. <br/> **NOTE: ppred() has been replaced by the relational operators, so its use is discouraged.**| Input: (<matrix>, <matrix>, <string with relational operator>), or <br/> (<matrix>, <scalar>, <string with relational operator>) <br/> Output: matrix | ppred(X,Y,"<") <br/> ppred(X,y,"<")
### Casting Built-In Functions
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-CG.dml b/scripts/algorithms/ALS-CG.dml
index cd2ba0b..79b1464 100644
--- a/scripts/algorithms/ALS-CG.dml
+++ b/scripts/algorithms/ALS-CG.dml
@@ -73,7 +73,7 @@ n = ncol (X);
U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
-W = ppred (X, 0, "!=");
+W = X != 0;
# check for regularization
if( reg == "L2" ) {
@@ -101,7 +101,7 @@ is_U = TRUE; # TRUE = Optimize U, FALSE = Optimize V
maxinneriter = r ; # min (ncol (U), 15);
if( check ) {
- loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_init = 0.5 * sum (X != 0) * (U %*% t(V) - X) ^ 2);
loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
print ("----- Initial train loss: " + loss_init + " -----");
}
@@ -112,10 +112,10 @@ while( as.integer(it/2) < max_iter & ! converged )
{
it = it + 1;
if( is_U ) {
- G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+ G = (X != 0) * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
}
else {
- G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
+ G = t(t(U) %*% (X != 0) * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
}
R = -G;
@@ -149,7 +149,7 @@ while( as.integer(it/2) < max_iter & ! converged )
# check for convergence
if( check & (it%%2 == 0) ) {
- loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_cur = 0.5 * sum (X != 0) * (U %*% t(V) - X) ^ 2);
loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
loss_dec = (loss_init - loss_cur) / loss_init;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
index 537c8ae..b3c5e18 100644
--- a/scripts/algorithms/ALS-DS.dml
+++ b/scripts/algorithms/ALS-DS.dml
@@ -67,11 +67,11 @@ V = read (fileV);
# check the input matrix V, if some rows or columns contain only zeros remove them from V
-V_nonzero_ind = ppred (V, 0, "!=");
+V_nonzero_ind = V != 0;
row_nonzeros = rowSums (V_nonzero_ind);
col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+orig_nonzero_rows_ind = row_nonzeros != 0;
+orig_nonzero_cols_ind = col_nonzeros != 0;
num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
if (num_zero_rows > 0) {
@@ -84,7 +84,7 @@ if (num_zero_cols > 0) {
}
if (num_zero_rows > 0 | num_zero_cols > 0) {
print ("Recomputing nonzero rows and columns!");
- V_nonzero_ind = ppred (V, 0, "!=");
+ V_nonzero_ind = V != 0;
row_nonzeros = rowSums (V_nonzero_ind);
col_nonzeros = t (colSums (V_nonzero_ind));
}
@@ -121,7 +121,7 @@ while ((it < max_iter) & (!converged)) {
it = it + 1;
# keep R fixed and update L
parfor (i in 1:m) {
- R_nonzero_ind = t(ppred(V[i,],0,"!="));
+ R_nonzero_ind = t(V[i,] != 0);
R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");
A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
L[i,] = t(solve (A1, t(V[i,] %*% R)));
@@ -129,7 +129,7 @@ while ((it < max_iter) & (!converged)) {
# keep L fixed and update R
parfor (j in 1:n) {
- L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+ L_nonzero_ind = t(Vt[j,] != 0)
L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
R[j,] = t(solve (A2, t(Vt[j,] %*% L)));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/ALS_predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS_predict.dml b/scripts/algorithms/ALS_predict.dml
index 6cf5b48..0af8301 100644
--- a/scripts/algorithms/ALS_predict.dml
+++ b/scripts/algorithms/ALS_predict.dml
@@ -85,7 +85,7 @@ UI = table (X[,1], X[,2], ones, Vrows, Vcols);
U = rowSums (UI)
# replacing all rows > 1 with 1
-U = ppred (U, 1, ">=");
+U = U >= 1;
# selecting users from factor L
U_prime = L * U;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/ALS_topk_predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS_topk_predict.dml b/scripts/algorithms/ALS_topk_predict.dml
index 9b7e476..a834473 100644
--- a/scripts/algorithms/ALS_topk_predict.dml
+++ b/scripts/algorithms/ALS_topk_predict.dml
@@ -60,7 +60,7 @@ V = read (fileV);
Vrows = nrow(V);
Vcols = ncol(V);
-zero_cols_ind = ppred (colSums (ppred (R, 0, "!=")), 0, "==");
+zero_cols_ind = (colSums (R != 0)) == 0;
K = min (Vcols - sum (zero_cols_ind), K);
n = nrow(X);
@@ -93,7 +93,7 @@ V_filter = U_prime %*% R;
V_prime = projection_matrix %*% V;
# filter for already recommended items
-V_prime = ppred(V_prime, 0, '==');
+V_prime = V_prime == 0);
# removes already recommended items and creating user2item matrix
V_filter = V_prime * V_filter;
@@ -115,7 +115,7 @@ for (i in 1:K){
V_filter = V_filter - range * table (seq (1, nrow (V_filter), 1), rowIndexMax, nrow(V_filter), ncol(V_filter));
}
-V_top_indices = V_top_indices * ppred (V_top_values, 0, ">");
+V_top_indices = V_top_indices * (V_top_values > 0);
# append users as a first column
V_top_indices = append (X[,1], V_top_indices);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/Cox-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox-predict.dml b/scripts/algorithms/Cox-predict.dml
index 7d444fd..d6c63cc 100644
--- a/scripts/algorithms/Cox-predict.dml
+++ b/scripts/algorithms/Cox-predict.dml
@@ -120,7 +120,7 @@ e_r = aggregate (target = RT_X, groups = RT_X, fn = "count");
Idx = cumsum (e_r);
all_times = table (seq (1, nrow (Idx), 1), Idx) %*% T_X; # distinct event times
-event_times = removeEmpty (target = ppred (d_r, 0, ">") * all_times, margin = "rows");
+event_times = removeEmpty (target = (d_r > 0) * all_times, margin = "rows");
num_distinct_event = nrow (event_times);
num_distinct = nrow (all_times); # no. of distinct timestamps censored or uncensored
@@ -130,8 +130,8 @@ select = t (colSums (table (seq (1, num_distinct), e_r_rev_agg)));
min_event_time = min (event_times);
max_event_time = max (event_times);
-T_Y = T_Y + (min_event_time * ppred (T_Y, min_event_time, "<"));
-T_Y = T_Y + (max_event_time * ppred (T_Y, max_event_time, ">"));
+T_Y = T_Y + (min_event_time * (T_Y < min_event_time));
+T_Y = T_Y + (max_event_time * (T_Y > max_event_time));
Ind = outer (T_Y, t (event_times), ">=");
Ind = table (seq (1, nrow (T_Y)), rowIndexMax (Ind), nrow (T_Y), num_distinct_event);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index 6da22ce..b3fe29f 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -159,7 +159,7 @@ num_timestamps = 1;
if (N == 1) {
RT = matrix (1, rows = 1, cols = 1);
} else {
- Idx[2:N,1] = ppred (X_orig[1:(N - 1),1], X_orig[2:N,1], "!=");
+ Idx[2:N,1] = (X_orig[1:(N - 1),1] != X_orig[2:N,1]);
num_timestamps = sum (Idx);
A = removeEmpty (target = diag (Idx), margin = "cols");
if (ncol (A) > 1) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/CsplineCG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/CsplineCG.dml b/scripts/algorithms/CsplineCG.dml
index 2281270..8193561 100644
--- a/scripts/algorithms/CsplineCG.dml
+++ b/scripts/algorithms/CsplineCG.dml
@@ -167,7 +167,7 @@ interpSpline = function(
) return (double q) {
#first find the right knots for interpolation
- i = as.integer(nrow(X) - sum(ppred(X, x, ">=")) + 1)
+ i = as.integer(nrow(X) - sum(X >= x) + 1)
#calc the y as per the algo docs
t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1])
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/CsplineDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/CsplineDS.dml b/scripts/algorithms/CsplineDS.dml
index ff34b5e..d81c215 100644
--- a/scripts/algorithms/CsplineDS.dml
+++ b/scripts/algorithms/CsplineDS.dml
@@ -142,7 +142,7 @@ interpSpline = function(
) return (double q) {
#first find the right knots for interpolation
- i = as.integer(nrow(X) - sum(ppred(X, x, ">=")) + 1)
+ i = as.integer(nrow(X) - sum(X >= x) + 1)
#calc the y as per the algo docs
t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1])
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/GLM-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM-predict.dml b/scripts/algorithms/GLM-predict.dml
index f58d8d0..90d1f6b 100644
--- a/scripts/algorithms/GLM-predict.dml
+++ b/scripts/algorithms/GLM-predict.dml
@@ -171,8 +171,8 @@ if (fileY != " ")
#
# POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
#
- if (link_power == 0.0) {
- is_zero_Y = ppred (Y, 0.0, "==");
+ if (link_power == 0) {
+ is_zero_Y = (Y == 0);
lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
} else {
lt_saturated = Y ^ link_power;
@@ -198,7 +198,7 @@ if (fileY != " ")
num_categories = ncol (beta) + 1;
if (min (Y) <= 0) {
# Category labels "0", "-1" etc. are converted into the baseline label
- Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
+ Y = Y + (- Y + num_categories) * (Y <= 0);
}
Y_size = min (num_categories, max(Y));
Y_unsized = table (seq (1, num_records, 1), Y);
@@ -210,8 +210,8 @@ if (fileY != " ")
}
P = means;
- zero_Y = ppred (Y, 0.0, "==");
- zero_P = ppred (P, 0.0, "==");
+ zero_Y = (Y == 0);
+ zero_P = (P == 0);
ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
logl_vec = rowSums (Y * log (P + zero_Y) );
@@ -224,10 +224,10 @@ if (fileY != " ")
means = means * (Y_counts %*% t(ones_ctg));
vars = vars * (Y_counts %*% t(ones_ctg));
- frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
- frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
+ frac_below_5 = sum (means < 5) / (nrow (means) * ncol (means));
+ frac_below_1 = sum (means < 1) / (nrow (means) * ncol (means));
- if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
+ if (frac_below_5 > 0.2 | frac_below_1 > 0) {
print ("WARNING: residual statistics are inaccurate here due to low cell means.");
}
@@ -341,7 +341,7 @@ glm_means_and_vars =
num_points = nrow (linear_terms);
if (dist_type == 1 & link_type == 1) {
# POWER DISTRIBUTION
- if (link_power == 0.0) {
+ if (link_power == 0) {
y_mean = exp (linear_terms);
} else { if (link_power == 1.0) {
y_mean = linear_terms;
@@ -350,7 +350,7 @@ glm_means_and_vars =
} else {
y_mean = linear_terms ^ (1.0 / link_power);
}}}
- if (var_power == 0.0) {
+ if (var_power == 0) {
var_function = matrix (1.0, rows = num_points, cols = 1);
} else { if (var_power == 1.0) {
var_function = y_mean;
@@ -362,10 +362,10 @@ glm_means_and_vars =
} else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
# BINOMIAL/BERNOULLI DISTRIBUTION
y_prob = matrix (0.0, rows = num_points, cols = 2);
- if (link_type == 1 & link_power == 0.0) { # Binomial.log
+ if (link_type == 1 & link_power == 0) { # Binomial.log
y_prob [, 1] = exp (linear_terms);
y_prob [, 2] = 1.0 - y_prob [, 1];
- } else { if (link_type == 1 & link_power != 0.0) { # Binomial.power_nonlog
+ } else { if (link_type == 1 & link_power != 0) { # Binomial.power_nonlog
y_prob [, 1] = linear_terms ^ (1.0 / link_power);
y_prob [, 2] = 1.0 - y_prob [, 1];
} else { if (link_type == 2) { # Binomial.logit
@@ -373,7 +373,7 @@ glm_means_and_vars =
y_prob [, 1] = elt / (1.0 + elt);
y_prob [, 2] = 1.0 / (1.0 + elt);
} else { if (link_type == 3) { # Binomial.probit
- sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
+ sign_lt = 2 * (linear_terms >= 0) - 1;
t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
erf_corr =
t_gp * ( 0.254829592
@@ -386,7 +386,7 @@ glm_means_and_vars =
y_prob = y_prob / 2;
} else { if (link_type == 4) { # Binomial.cloglog
elt = exp (linear_terms);
- is_too_small = ppred (10000000 + elt, 10000000, "==");
+ is_too_small = ((10000000 + elt) == 10000000);
y_prob [, 2] = exp (- elt);
y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
} else { if (link_type == 5) { # Binomial.cauchit
@@ -416,25 +416,25 @@ glm_partial_loglikelihood_for_power_dist_and_link = # Assumes: dist_type == 1
{
num_records = nrow (Y);
if (var_power == 1.0) { # Poisson
- if (link_power == 0.0) { # Poisson.log
- is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+ if (link_power == 0) { # Poisson.log
+ is_natural_parameter_log_zero = (linear_terms == -1.0/0.0);
natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
b_cumulant = exp (linear_terms);
} else { # Poisson.power_nonlog
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == 0);
natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
}
- is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
+ is_minus_infinity = (Y > 0) * is_natural_parameter_log_zero;
log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
} else {
- if (var_power == 2.0 & link_power == 0.0) { # Gamma.log
+ if (var_power == 2.0 & link_power == 0) { # Gamma.log
natural_parameters = - exp (- linear_terms);
b_cumulant = linear_terms;
} else { if (var_power == 2.0) { # Gamma.power_nonlog
natural_parameters = - linear_terms ^ (- 1.0 / link_power);
b_cumulant = log (linear_terms) / link_power;
- } else { if (link_power == 0.0) { # PowerDist.log
+ } else { if (link_power == 0) { # PowerDist.log
natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
} else { # PowerDist.power_nonlog
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/GLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM.dml b/scripts/algorithms/GLM.dml
index 16008e6..9cad79f 100644
--- a/scripts/algorithms/GLM.dml
+++ b/scripts/algorithms/GLM.dml
@@ -198,7 +198,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
{ # Important assumption: X [, num_features] = ones_r
avg_X_cols = t(colSums(X)) / num_records;
var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = (var_X_cols <= 0);
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [num_features, 1] = 1;
shift_X = - avg_X_cols * scale_X;
@@ -233,7 +233,7 @@ if (max_iteration_CG == 0) {
if (distribution_type == 2 & ncol(Y) == 1)
{
- is_Y_negative = ppred (Y, bernoulli_No_label, "==");
+ is_Y_negative = (Y == bernoulli_No_label);
Y = append (1 - is_Y_negative, is_Y_negative);
count_Y_negative = sum (is_Y_negative);
if (count_Y_negative == 0) {
@@ -477,7 +477,7 @@ pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 0.0/0.0,
if (num_records > num_features) {
estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
}
-if (dispersion <= 0.0) {
+if (dispersion <= 0) {
dispersion = estimated_dispersion;
}
deviance = deviance_nodisp / dispersion;
@@ -519,28 +519,28 @@ check_if_supported =
if (ncol_y == 1 & dist_type == 1 & link_type == 1)
{ # POWER DISTRIBUTION
is_supported = 1;
- if (var_power == 0.0 & link_power == -1.0) {print ("Gaussian.inverse"); } else {
- if (var_power == 0.0 & link_power == 0.0) {print ("Gaussian.log"); } else {
- if (var_power == 0.0 & link_power == 0.5) {print ("Gaussian.sqrt"); } else {
- if (var_power == 0.0 & link_power == 1.0) {print ("Gaussian.id"); } else {
- if (var_power == 0.0 ) {print ("Gaussian.power_nonlog"); } else {
+ if (var_power == 0 & link_power == -1.0) {print ("Gaussian.inverse"); } else {
+ if (var_power == 0 & link_power == 0) {print ("Gaussian.log"); } else {
+ if (var_power == 0 & link_power == 0.5) {print ("Gaussian.sqrt"); } else {
+ if (var_power == 0 & link_power == 1.0) {print ("Gaussian.id"); } else {
+ if (var_power == 0 ) {print ("Gaussian.power_nonlog"); } else {
if (var_power == 1.0 & link_power == -1.0) {print ("Poisson.inverse"); } else {
- if (var_power == 1.0 & link_power == 0.0) {print ("Poisson.log"); } else {
+ if (var_power == 1.0 & link_power == 0) {print ("Poisson.log"); } else {
if (var_power == 1.0 & link_power == 0.5) {print ("Poisson.sqrt"); } else {
if (var_power == 1.0 & link_power == 1.0) {print ("Poisson.id"); } else {
if (var_power == 1.0 ) {print ("Poisson.power_nonlog"); } else {
if (var_power == 2.0 & link_power == -1.0) {print ("Gamma.inverse"); } else {
- if (var_power == 2.0 & link_power == 0.0) {print ("Gamma.log"); } else {
+ if (var_power == 2.0 & link_power == 0) {print ("Gamma.log"); } else {
if (var_power == 2.0 & link_power == 0.5) {print ("Gamma.sqrt"); } else {
if (var_power == 2.0 & link_power == 1.0) {print ("Gamma.id"); } else {
if (var_power == 2.0 ) {print ("Gamma.power_nonlog"); } else {
if (var_power == 3.0 & link_power == -2.0) {print ("InvGaussian.1/mu^2"); } else {
if (var_power == 3.0 & link_power == -1.0) {print ("InvGaussian.inverse"); } else {
- if (var_power == 3.0 & link_power == 0.0) {print ("InvGaussian.log"); } else {
+ if (var_power == 3.0 & link_power == 0) {print ("InvGaussian.log"); } else {
if (var_power == 3.0 & link_power == 0.5) {print ("InvGaussian.sqrt"); } else {
if (var_power == 3.0 & link_power == 1.0) {print ("InvGaussian.id"); } else {
if (var_power == 3.0 ) {print ("InvGaussian.power_nonlog");}else{
- if ( link_power == 0.0) {print ("PowerDist.log"); } else {
+ if ( link_power == 0) {print ("PowerDist.log"); } else {
print ("PowerDist.power_nonlog");
} }}}}} }}}}} }}}}} }}}}} }}
if (ncol_y == 1 & dist_type == 2)
@@ -551,7 +551,7 @@ check_if_supported =
{ # BINOMIAL/BERNOULLI DISTRIBUTION
is_supported = 1;
if (link_type == 1 & link_power == -1.0) {print ("Binomial.inverse"); } else {
- if (link_type == 1 & link_power == 0.0) {print ("Binomial.log"); } else {
+ if (link_type == 1 & link_power == 0) {print ("Binomial.log"); } else {
if (link_type == 1 & link_power == 0.5) {print ("Binomial.sqrt"); } else {
if (link_type == 1 & link_power == 1.0) {print ("Binomial.id"); } else {
if (link_type == 1) {print ("Binomial.power_nonlog"); } else {
@@ -574,14 +574,14 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN)
y_corr = Y [, 1];
if (dist_type == 2) {
n_corr = rowSums (Y);
- is_n_zero = ppred (n_corr, 0.0, "==");
+ is_n_zero = (n_corr == 0);
y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;
}
linear_terms = y_corr;
if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
- if (link_power == 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ if (link_power == 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { isNaN = 1; }
} else { if (link_power == 1.0) {
@@ -589,48 +589,48 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN)
} else { if (link_power == -1.0) {
linear_terms = 1.0 / y_corr;
} else { if (link_power == 0.5) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+ if (sum (y_corr < 0) == 0) {
linear_terms = sqrt (y_corr);
} else { isNaN = 1; }
- } else { if (link_power > 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ } else { if (link_power > 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
} else { isNaN = 1; }
} else {
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+ if (sum (y_corr <= 0) == 0) {
linear_terms = y_corr ^ link_power;
} else { isNaN = 1; }
}}}}}
}
if (dist_type == 2 & link_type >= 1 & link_type <= 5)
{ # BINOMIAL/BERNOULLI DISTRIBUTION
- if (link_type == 1 & link_power == 0.0) { # Binomial.log
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ if (link_type == 1 & link_power == 0) { # Binomial.log
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { isNaN = 1; }
- } else { if (link_type == 1 & link_power > 0.0) { # Binomial.power_nonlog pos
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ } else { if (link_type == 1 & link_power > 0) { # Binomial.power_nonlog pos
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
} else { isNaN = 1; }
} else { if (link_type == 1) { # Binomial.power_nonlog neg
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+ if (sum (y_corr <= 0) == 0) {
linear_terms = y_corr ^ link_power;
} else { isNaN = 1; }
} else {
- is_zero_y_corr = ppred (y_corr, 0.0, "<=");
- is_one_y_corr = ppred (y_corr, 1.0, ">=");
+ is_zero_y_corr = (y_corr <= 0);
+ is_one_y_corr = (y_corr >= 1.0);
y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
if (link_type == 2) { # Binomial.logit
linear_terms = log (y_corr / (1.0 - y_corr))
+ is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { if (link_type == 3) { # Binomial.probit
- y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
+ y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);
t = sqrt (- 2.0 * log (y_below_half));
approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
- linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
+ linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))
+ is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { if (link_type == 4) { # Binomial.cloglog
linear_terms = log (- log (1.0 - y_corr))
@@ -647,11 +647,11 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN)
glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
}
- if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
+ if ((dist_type == 1 & link_type == 1 & link_power == 0) |
(dist_type == 2 & link_type >= 2))
{
desired_eta = 0.0;
- } else { if (link_type == 1 & link_power == 0.0) {
+ } else { if (link_type == 1 & link_power == 0) {
desired_eta = log (0.5);
} else { if (link_type == 1) {
desired_eta = 0.5 ^ link_power;
@@ -661,7 +661,7 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN)
beta = matrix (0.0, rows = ncol(X), cols = 1);
- if (desired_eta != 0.0) {
+ if (desired_eta != 0) {
if (icept_status == 1 | icept_status == 2) {
beta [nrow(beta), 1] = desired_eta;
} else {
@@ -712,7 +712,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
y_mean = zeros_r;
- if (link_power == 0.0) {
+ if (link_power == 0) {
y_mean = exp (linear_terms);
y_mean_pow = y_mean ^ (1 - var_power);
w = y_mean_pow * y_mean;
@@ -731,7 +731,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
if (dist_type == 2 & link_type >= 1 & link_type <= 5)
{ # BINOMIAL/BERNOULLI DISTRIBUTION
if (link_type == 1) { # BINOMIAL.POWER LINKS
- if (link_power == 0.0) { # Binomial.log
+ if (link_power == 0) { # Binomial.log
vec1 = 1 / (exp (- linear_terms) - 1);
g_Y = Y [, 1] - Y [, 2] * vec1;
w = rowSums (Y) * vec1;
@@ -739,19 +739,19 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
vec1 = zeros_r;
if (link_power == 0.5) {
vec1 = 1 / (1 - linear_terms ^ 2);
- } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ } else { if (sum (linear_terms < 0) == 0) {
vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
} else {isNaN = 1;}}
# We want a "zero-protected" version of
# vec2 = Y [, 1] / linear_terms;
- is_y_0 = ppred (Y [, 1], 0.0, "==");
+ is_y_0 = (Y [, 1] == 0);
vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
g_Y = (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
w = rowSums (Y) * vec1 / link_power ^ 2;
}
} else {
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
+ is_LT_pos_infinite = (linear_terms == 1.0/0.0);
+ is_LT_neg_infinite = (linear_terms == -1.0/0.0);
is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
@@ -762,7 +762,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
g_Y = rowSums (Y * (Y_prob %*% flip_neg)); ### = y_residual;
w = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob); ### = y_variance;
} else { if (link_type == 3) { # Binomial.probit
- is_lt_pos = ppred (linear_terms, 0.0, ">=");
+ is_lt_pos = (linear_terms >= 0);
t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
pt_gp = t_gp * ( 0.254829592
+ t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
@@ -777,7 +777,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
} else { if (link_type == 4) { # Binomial.cloglog
the_exp = exp (linear_terms)
the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
+ is_too_small = ((10000000 + the_exp) == 10000000);
the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
g_Y = (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
@@ -809,52 +809,52 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
b_cumulant = zeros_r;
natural_parameters = zeros_r;
is_natural_parameter_log_zero = zeros_r;
- if (var_power == 1.0 & link_power == 0.0) { # Poisson.log
+ if (var_power == 1.0 & link_power == 0) { # Poisson.log
b_cumulant = exp (linear_terms);
- is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == -1.0/0.0);
natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
} else { if (var_power == 1.0 & link_power == 1.0) { # Poisson.id
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
b_cumulant = linear_terms;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == 0);
natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
} else {isNaN = 1;}
} else { if (var_power == 1.0 & link_power == 0.5) { # Poisson.sqrt
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
b_cumulant = linear_terms ^ 2;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == 0);
natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
} else {isNaN = 1;}
- } else { if (var_power == 1.0 & link_power > 0.0) { # Poisson.power_nonlog, pos
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ } else { if (var_power == 1.0 & link_power > 0) { # Poisson.power_nonlog, pos
+ if (sum (linear_terms < 0) == 0) {
+ is_natural_parameter_log_zero = (linear_terms == 0);
b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
} else {isNaN = 1;}
} else { if (var_power == 1.0) { # Poisson.power_nonlog, neg
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = linear_terms ^ (1.0 / link_power);
natural_parameters = log (linear_terms) / link_power;
} else {isNaN = 1;}
} else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = - log (linear_terms);
natural_parameters = - linear_terms;
} else {isNaN = 1;}
} else { if (var_power == 2.0 & link_power == 1.0) { # Gamma.id
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = log (linear_terms);
natural_parameters = - 1.0 / linear_terms;
} else {isNaN = 1;}
- } else { if (var_power == 2.0 & link_power == 0.0) { # Gamma.log
+ } else { if (var_power == 2.0 & link_power == 0) { # Gamma.log
b_cumulant = linear_terms;
natural_parameters = - exp (- linear_terms);
} else { if (var_power == 2.0) { # Gamma.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = log (linear_terms) / link_power;
natural_parameters = - linear_terms ^ (- 1.0 / link_power);
} else {isNaN = 1;}
- } else { if (link_power == 0.0) { # PowerDist.log
+ } else { if (link_power == 0) { # PowerDist.log
natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
} else { # PowerDist.power_nonlog
@@ -867,7 +867,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
} else { if ( 2 * link_power == 1.0 - var_power) {
natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
} else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
power = (1.0 - var_power) / link_power;
natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
} else {isNaN = 1;}
@@ -881,13 +881,13 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
} else { if ( 2 * link_power == 2.0 - var_power) {
b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
} else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
power = (2.0 - var_power) / link_power;
b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
} else {isNaN = 1;}
}}}}
}}}}} }}}}}
- if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
+ if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {
log_l = -1.0 / 0.0;
isNaN = 1;
}
@@ -905,8 +905,8 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
[Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
if (isNaN == 0) {
- does_prob_contradict = ppred (Y_prob, 0.0, "<=");
- if (sum (does_prob_contradict * abs (Y)) == 0.0) {
+ does_prob_contradict = (Y_prob <= 0);
+ if (sum (does_prob_contradict * abs (Y)) == 0) {
log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
isNaN = 1;
@@ -949,18 +949,18 @@ binomial_probability_two_column =
Y_prob = zeros_r %*% ones_2;
if (link_type == 1) { # Binomial.power
- if (link_power == 0.0) { # Binomial.log
+ if (link_power == 0) { # Binomial.log
Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;
} else { if (link_power == 0.5) { # Binomial.sqrt
Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;
} else { # Binomial.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;
} else {isNaN = 1;}
}}
} else { # Binomial.non_power
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
+ is_LT_pos_infinite = (linear_terms == 1.0/0.0);
+ is_LT_neg_infinite = (linear_terms == -1.0/0.0);
is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
@@ -968,7 +968,7 @@ binomial_probability_two_column =
Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
} else { if (link_type == 3) { # Binomial.probit
- lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
+ lt_pos_neg = (finite_linear_terms >= 0) %*% p_one_m_one + ones_r %*% zero_one;
t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
pt_gp = t_gp * ( 0.254829592
+ t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
@@ -980,7 +980,7 @@ binomial_probability_two_column =
} else { if (link_type == 4) { # Binomial.cloglog
the_exp = exp (finite_linear_terms);
the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
+ is_too_small = ((10000000 + the_exp) == 10000000);
Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
Y_prob [, 2] = the_exp_exp;
} else { if (link_type == 5) { # Binomial.cauchit
@@ -1148,13 +1148,13 @@ return (double mantissa, int eee)
mantissa = 1.0;
d_eee = d_eee + 1;
}
- if (x_to_truncate < 0.0) {
+ if (x_to_truncate < 0) {
mantissa = - mantissa;
}
eee = 0;
pow_two = 1;
res_eee = abs (d_eee);
- while (res_eee != 0.0) {
+ while (res_eee != 0) {
new_res_eee = round (res_eee / 2.0 - 0.3);
if (new_res_eee * 2.0 < res_eee) {
eee = eee + pow_two;
@@ -1162,7 +1162,7 @@ return (double mantissa, int eee)
res_eee = new_res_eee;
pow_two = 2 * pow_two;
}
- if (d_eee < 0.0) {
+ if (d_eee < 0) {
eee = - eee;
}
} else { mantissa = x_to_truncate; }
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/KM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/KM.dml b/scripts/algorithms/KM.dml
index fbfa917..d648c50 100644
--- a/scripts/algorithms/KM.dml
+++ b/scripts/algorithms/KM.dml
@@ -183,7 +183,7 @@ if (n_group_cols > 0) {
}
XG = X[,3:(3 + n_group_cols - 1)];
Idx = matrix (1, rows = num_records, cols = 1);
- Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),3:(2 + n_group_cols)], X[2:num_records,3:(2 + n_group_cols)], "!="));
+ Idx[2:num_records,] = rowMaxs (X[1:(num_records - 1),3:(2 + n_group_cols)] != X[2:num_records,3:(2 + n_group_cols)]);
num_groups = sum (Idx);
XG = replace (target = XG, pattern = 0, replacement = "Infinity");
@@ -219,7 +219,7 @@ if (n_stratum_cols > 0) {
}
XS = X[,(s_offset + 1):(s_offset + n_stratum_cols)];
Idx = matrix (1, rows = num_records, cols = 1);
- Idx[2:num_records,] = rowMaxs (ppred (X[1:(num_records - 1),(s_offset + 1):(s_offset + n_stratum_cols)], X[2:num_records,(s_offset + 1):(s_offset + n_stratum_cols)], "!="));
+ Idx[2:num_records,] = rowMaxs (X[1:(num_records - 1),(s_offset + 1):(s_offset + n_stratum_cols)] != X[2:num_records,(s_offset + 1):(s_offset + n_stratum_cols)]);
num_strata = sum (Idx);
XS = replace (target = XS, pattern = 0, replacement = "Infinity");
@@ -294,7 +294,7 @@ parfor (s in 1:num_strata, check = 0) {
if (range == 1) {
RT = matrix (1, rows = 1, cols = 1);
} else {
- Idx1[2:range,1] = ppred (X_cur[1:(range - 1),1], X_cur[2:range,1], "!=");
+ Idx1[2:range,1] = (X_cur[1:(range - 1),1] != X_cur[2:range,1]);
num_timestamps = sum (Idx1);
A1 = removeEmpty (target = diag (Idx1), margin = "cols");
if (ncol (A1) > 1) {
@@ -315,7 +315,7 @@ parfor (s in 1:num_strata, check = 0) {
n_event_all_stratum = aggregate (target = E, groups = RT, fn = "count"); # no. both censored and uncensored of events per stratum
Idx1 = cumsum (n_event_all_stratum);
time_stratum = table (seq (1, nrow (Idx1), 1), Idx1) %*% T; # distinct timestamps both censored and uncensored per stratum
- time_stratum_has_zero = sum (ppred (time_stratum, 0, "==")) > 0;
+ time_stratum_has_zero = sum (time_stratum == 0) > 0;
if (time_stratum_has_zero) {
time_stratum = replace (target = time_stratum, pattern = 0, replacement = "Infinity");
}
@@ -332,7 +332,7 @@ parfor (s in 1:num_strata, check = 0) {
parfor (g in 1:num_groups, check = 0) {
- group_ind = ppred (G, g, "==");
+ group_ind = (G == g);
KM_offset = (s - 1) * num_groups * 7 + (g - 1) * 7;
M_offset = (s - 1) * num_groups + g;
if (sum (group_ind) != 0) { # group g is present in the stratum s
@@ -347,7 +347,7 @@ parfor (s in 1:num_strata, check = 0) {
n_event = aggregate (target = E_cur, groups = RT, fn = "sum"); # no. of uncensored events per stratum per group
n_event_all = aggregate (target = group_ind, groups = RT, fn = "sum"); # no. of both censored and uncensored events per stratum per group
Idx1 = cumsum (n_event_all);
- event_occurred = ppred (n_event, 0, ">");
+ event_occurred = (n_event > 0);
if (time_stratum_has_zero) {
time = replace (target = time_stratum * event_occurred, pattern = "NaN", replacement = 0);
time = removeEmpty (target = time, margin = "rows");
@@ -369,7 +369,7 @@ parfor (s in 1:num_strata, check = 0) {
}
# Extract only rows corresponding to events, i.e., for which n_event is nonzero
- Idx1 = ppred (n_event, 0, "!=");
+ Idx1 = (n_event != 0);
KM_1 = matrix (0, rows = n_time_all2, cols = 2);
KM_1[,1] = n_risk;
KM_1[,2] = n_event;
@@ -421,7 +421,7 @@ parfor (s in 1:num_strata, check = 0) {
######## ESTIMATE MEDIAN OF SERVIVAL TIMES AND ITS 100(1-ALPHA)% CONFIDENCE INTERVAL
- p_5 = ppred (surv, 0.5, "<=");
+ p_5 = (surv <= 0.5);
pn_5 = sum (p_5);
#M_offset = (s - 1) * num_groups + g;
# if the estimated survivor function is larger than 0.5 for all timestamps median does not exist!
@@ -439,8 +439,8 @@ parfor (s in 1:num_strata, check = 0) {
t_5 = as.scalar (time[n_time - pn_5 + 1,1]);
}
- l_ind = ppred (CI_l, 0.5, "<=");
- r_ind = ppred (CI_r, 0.5, "<=");
+ l_ind = (CI_l <= 0.5);
+ r_ind = (CI_r <= 0.5);
l_ind_sum = sum (l_ind);
r_ind_sum = sum (r_ind);
l_min_ind = as.scalar (rowIndexMin (t(l_ind)));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/Kmeans-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans-predict.dml b/scripts/algorithms/Kmeans-predict.dml
index 3823234..17e673a 100644
--- a/scripts/algorithms/Kmeans-predict.dml
+++ b/scripts/algorithms/Kmeans-predict.dml
@@ -144,7 +144,7 @@ if (fileX != " ") {
# Compute the means, as opposed to the centroids
cluster_sizes = t(colSums (P));
record_of_ones = matrix (1, rows = 1, cols = ncol (X));
- M = (t(P) %*% X) / ((cluster_sizes + ppred (cluster_sizes, 0, "==")) %*% record_of_ones);
+ M = (t(P) %*% X) / ((cluster_sizes + (cluster_sizes == 0)) %*% record_of_ones);
# Compute the WCSS for the means
wcss_means = sum ((X - P %*% M) ^ 2);
wcss_means_pc = 100.0 * wcss_means / total_ss;
@@ -323,7 +323,7 @@ return (Matrix[double] row_ids, Matrix[double] col_ids, Matrix[double] margins,
Matrix[double] max_counts, Matrix[double] rounded_percentages)
{
margins = rowSums (counts);
- select_positive = diag (ppred (margins, 0, ">"));
+ select_positive = diag (margins > 0);
select_positive = removeEmpty (target = select_positive, margin = "rows");
row_ids = select_positive %*% seq (1, nrow (margins), 1);
pos_counts = select_positive %*% counts;
@@ -331,9 +331,9 @@ return (Matrix[double] row_ids, Matrix[double] col_ids, Matrix[double] margins,
max_counts = rowMaxs (pos_counts);
one_per_column = matrix (1, rows = 1, cols = ncol (pos_counts));
max_counts_ppred = max_counts %*% one_per_column;
- is_max_count = ppred (pos_counts, max_counts_ppred, "==");
+ is_max_count = (pos_counts == max_counts_ppred);
aggr_is_max_count = t(cumsum (t(is_max_count)));
- col_ids = rowSums (ppred (aggr_is_max_count, 0, "==")) + 1;
+ col_ids = rowSums (aggr_is_max_count == 0) + 1;
rounded_percentages = round (1000000.0 * max_counts / pos_margins) / 10000.0;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/Kmeans.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml
index 8717473..85e13c5 100644
--- a/scripts/algorithms/Kmeans.dml
+++ b/scripts/algorithms/Kmeans.dml
@@ -99,7 +99,7 @@ for (i in 1 : num_centroids)
# probability ~ min_distances:
random_row = Rand (rows = 1, cols = num_runs, min = 0.0, max = 1.0);
threshold_matrix = random_row * cdf_min_distances [sample_block_size, ];
- centroid_ids = t(colSums (ppred (cdf_min_distances, threshold_matrix, "<"))) + 1;
+ centroid_ids = t(colSums (cdf_min_distances < threshold_matrix)) + 1;
# Place the selected centroids together, one per run, into a matrix:
centroid_placer = matrix (0, rows = num_runs, cols = (sample_block_size * num_runs));
@@ -165,12 +165,12 @@ parfor (run_index in 1 : num_runs, check = 0)
} else {
iter_count = iter_count + 1;
# Find the closest centroid for each record
- P = ppred (D, minD, "<=");
+ P = (D <= minD);
# If some records belong to multiple centroids, share them equally
P = P / rowSums (P);
# Compute the column normalization factor for P
P_denom = colSums (P);
- if (sum (ppred (P_denom, 0.0, "<=")) > 0) {
+ if (sum (P_denom <= 0) > 0) {
term_code = 3; # There is a "runaway" centroid with 0.0 denominator
} else {
C_old = C;
@@ -200,9 +200,9 @@ if (num_successful_runs > 0) {
worst_wcss = max (final_wcss_successful);
best_wcss = min (final_wcss_successful + (10 * worst_wcss + 10) * (1 - termination_bitmap [, 1]));
avg_wcss = sum (final_wcss_successful) / num_successful_runs;
- best_index_vector = ppred (final_wcss_successful, best_wcss, "==");
+ best_index_vector = (final_wcss_successful == best_wcss);
aggr_best_index_vector = cumsum (best_index_vector);
- best_index = as.integer (sum (ppred (aggr_best_index_vector, 0, "==")) + 1);
+ best_index = as.integer (sum (aggr_best_index_vector == 0) + 1);
print ("Successful runs: Best run is " + best_index + " with Centroid WCSS = " + best_wcss
+ "; Avg WCSS = " + avg_wcss + "; Worst WCSS = " + worst_wcss);
C = All_Centroids [(num_centroids * (best_index - 1) + 1) : (num_centroids * best_index), ];
@@ -211,9 +211,9 @@ if (num_successful_runs > 0) {
if (is_write_Y == 1) {
print ("Writing out the best-WCSS cluster labels...");
D = -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
- P = ppred (D, rowMins (D), "<=");
+ P = (D <= rowMins (D));
aggr_P = t(cumsum (t(P)));
- Y = rowSums (ppred (aggr_P, 0, "==")) + 1
+ Y = rowSums (aggr_P == 0) + 1
write (Y, fileY, format=fmtCY);
}
print ("DONE.");
@@ -240,7 +240,7 @@ get_sample_maps = function (int num_records, int num_samples, int approx_sample_
sample_rec_ids = cumsum (sample_rec_ids); # (skip to next one) --> (skip to i-th one)
# Replace all sample record ids over "num_records" (i.e. out of range) by "num_records + 1":
- is_sample_rec_id_within_range = ppred (sample_rec_ids, num_records, "<=");
+ is_sample_rec_id_within_range = (sample_rec_ids <= num_records);
sample_rec_ids = sample_rec_ids * is_sample_rec_id_within_range
+ (num_records + 1) * (1 - is_sample_rec_id_within_range);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/LinearRegCG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegCG.dml b/scripts/algorithms/LinearRegCG.dml
index ebfa5a4..5e7bb36 100644
--- a/scripts/algorithms/LinearRegCG.dml
+++ b/scripts/algorithms/LinearRegCG.dml
@@ -124,7 +124,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
{ # Important assumption: X [, m_ext] = ones_n
avg_X_cols = t(colSums(X)) / n;
var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = (var_X_cols <= 0);
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [m_ext, 1] = 1;
shift_X = - avg_X_cols * scale_X;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/LinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegDS.dml b/scripts/algorithms/LinearRegDS.dml
index 0ec663a..eb85c60 100644
--- a/scripts/algorithms/LinearRegDS.dml
+++ b/scripts/algorithms/LinearRegDS.dml
@@ -106,7 +106,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
{ # Important assumption: X [, m_ext] = ones_n
avg_X_cols = t(colSums(X)) / n;
var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = (var_X_cols <= 0);
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [m_ext, 1] = 1;
shift_X = - avg_X_cols * scale_X;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/MultiLogReg.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/MultiLogReg.dml b/scripts/algorithms/MultiLogReg.dml
index 716678d..8dc5a76 100644
--- a/scripts/algorithms/MultiLogReg.dml
+++ b/scripts/algorithms/MultiLogReg.dml
@@ -114,7 +114,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
{ # Important assumption: X [, D] = matrix (1, rows = N, cols = 1)
avg_X_cols = t(colSums(X)) / N;
var_X_cols = (t(colSums (X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = var_X_cols <= 0;
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [D, 1] = 1;
shift_X = - avg_X_cols * scale_X;
@@ -142,7 +142,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
if (min (Y_vec) <= 0) {
# Category labels "0", "-1" etc. are converted into the largest label
max_y = max (Y_vec);
- Y_vec = Y_vec + (- Y_vec + max_y + 1) * ppred (Y_vec , 0.0, "<=");
+ Y_vec = Y_vec + (- Y_vec + max_y + 1) * (Y_vec <= 0);
}
Y = table (seq (1, N, 1), Y_vec);
K = ncol (Y) - 1; # The number of non-baseline categories
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/StepGLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/StepGLM.dml b/scripts/algorithms/StepGLM.dml
index a8c8820..edc1346 100644
--- a/scripts/algorithms/StepGLM.dml
+++ b/scripts/algorithms/StepGLM.dml
@@ -100,7 +100,7 @@ X_orig = read (fileX);
Y = read (fileY);
if (distribution_type == 2 & ncol(Y) == 1) {
- is_Y_negative = ppred (Y, bernoulli_No_label, "==");
+ is_Y_negative = (Y == bernoulli_No_label);
Y = append (1 - is_Y_negative, is_Y_negative);
count_Y_negative = sum (is_Y_negative);
if (count_Y_negative == 0) {
@@ -275,7 +275,7 @@ glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double
# Important assumption: X [, num_features] = ones_r
avg_X_cols = t(colSums(X)) / num_records;
var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = (var_X_cols <= 0);
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [num_features, 1] = 1;
shift_X = - avg_X_cols * scale_X;
@@ -490,7 +490,7 @@ glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double
}
if (intercept_status == 0 & num_features == 1) {
- p = sum (ppred (X, 1, "=="));
+ p = sum (X == 1);
if (p == num_records) {
beta_out = beta_out[1,];
}
@@ -522,7 +522,7 @@ glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double
if (num_records > num_features) {
estimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);
}
- if (dispersion <= 0.0) {
+ if (dispersion <= 0) {
dispersion = estimated_dispersion;
}
deviance = deviance_nodisp / dispersion;
@@ -639,14 +639,14 @@ glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, do
y_corr = Y [, 1];
if (dist_type == 2) {
n_corr = rowSums (Y);
- is_n_zero = ppred (n_corr, 0.0, "==");
+ is_n_zero = (n_corr == 0);
y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;
}
linear_terms = y_corr;
if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
- if (link_power == 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ if (link_power == 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { isNaN = 1; }
} else { if (link_power == 1.0) {
@@ -654,48 +654,48 @@ glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, do
} else { if (link_power == -1.0) {
linear_terms = 1.0 / y_corr;
} else { if (link_power == 0.5) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
+ if (sum (y_corr < 0) == 0) {
linear_terms = sqrt (y_corr);
} else { isNaN = 1; }
- } else { if (link_power > 0.0) {
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ } else { if (link_power > 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
} else { isNaN = 1; }
} else {
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+ if (sum (y_corr <= 0) == 0) {
linear_terms = y_corr ^ link_power;
} else { isNaN = 1; }
}}}}}
}
if (dist_type == 2 & link_type >= 1 & link_type <= 5)
{ # BINOMIAL/BERNOULLI DISTRIBUTION
- if (link_type == 1 & link_power == 0.0) { # Binomial.log
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ if (link_type == 1 & link_power == 0) { # Binomial.log
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { isNaN = 1; }
- } else { if (link_type == 1 & link_power > 0.0) { # Binomial.power_nonlog pos
- if (sum (ppred (y_corr, 0.0, "<")) == 0) {
- is_zero_y_corr = ppred (y_corr, 0.0, "==");
+ } else { if (link_type == 1 & link_power > 0) { # Binomial.power_nonlog pos
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
} else { isNaN = 1; }
} else { if (link_type == 1) { # Binomial.power_nonlog neg
- if (sum (ppred (y_corr, 0.0, "<=")) == 0) {
+ if (sum (y_corr <= 0) == 0) {
linear_terms = y_corr ^ link_power;
} else { isNaN = 1; }
} else {
- is_zero_y_corr = ppred (y_corr, 0.0, "<=");
- is_one_y_corr = ppred (y_corr, 1.0, ">=");
+ is_zero_y_corr = (y_corr <= 0);
+ is_one_y_corr = (y_corr >= 1.0);
y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);
if (link_type == 2) { # Binomial.logit
linear_terms = log (y_corr / (1.0 - y_corr))
+ is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { if (link_type == 3) { # Binomial.probit
- y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">");
+ y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);
t = sqrt (- 2.0 * log (y_below_half));
approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
- linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">"))
+ linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))
+ is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);
} else { if (link_type == 4) { # Binomial.cloglog
linear_terms = log (- log (1.0 - y_corr))
@@ -712,11 +712,11 @@ glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, do
glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);
}
- if ((dist_type == 1 & link_type == 1 & link_power == 0.0) |
+ if ((dist_type == 1 & link_type == 1 & link_power == 0) |
(dist_type == 2 & link_type >= 2))
{
desired_eta = 0.0;
- } else { if (link_type == 1 & link_power == 0.0) {
+ } else { if (link_type == 1 & link_power == 0) {
desired_eta = log (0.5);
} else { if (link_type == 1) {
desired_eta = 0.5 ^ link_power;
@@ -726,7 +726,7 @@ glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, do
beta = matrix (0.0, rows = ncol(X), cols = 1);
- if (desired_eta != 0.0) {
+ if (desired_eta != 0) {
if (icept_status == 1 | icept_status == 2) {
beta [nrow(beta), 1] = desired_eta;
} else {
@@ -777,7 +777,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION
y_mean = zeros_r;
- if (link_power == 0.0) {
+ if (link_power == 0) {
y_mean = exp (linear_terms);
y_mean_pow = y_mean ^ (1 - var_power);
w = y_mean_pow * y_mean;
@@ -796,7 +796,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
if (dist_type == 2 & link_type >= 1 & link_type <= 5)
{ # BINOMIAL/BERNOULLI DISTRIBUTION
if (link_type == 1) { # BINOMIAL.POWER LINKS
- if (link_power == 0.0) { # Binomial.log
+ if (link_power == 0) { # Binomial.log
vec1 = 1 / (exp (- linear_terms) - 1);
g_Y = Y [, 1] - Y [, 2] * vec1;
w = rowSums (Y) * vec1;
@@ -804,19 +804,19 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
vec1 = zeros_r;
if (link_power == 0.5) {
vec1 = 1 / (1 - linear_terms ^ 2);
- } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ } else { if (sum (linear_terms < 0) == 0) {
vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));
} else {isNaN = 1;}}
# We want a "zero-protected" version of
# vec2 = Y [, 1] / linear_terms;
- is_y_0 = ppred (Y [, 1], 0.0, "==");
+ is_y_0 = (Y [, 1] == 0);
vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;
g_Y = (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
w = rowSums (Y) * vec1 / link_power ^ 2;
}
} else {
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
+ is_LT_pos_infinite = (linear_terms == 1.0/0.0);
+ is_LT_neg_infinite = (linear_terms == -1.0/0.0);
is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
@@ -827,7 +827,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
g_Y = rowSums (Y * (Y_prob %*% flip_neg)); ### = y_residual;
w = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob); ### = y_variance;
} else { if (link_type == 3) { # Binomial.probit
- is_lt_pos = ppred (linear_terms, 0.0, ">=");
+ is_lt_pos = (linear_terms >= 0);
t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
pt_gp = t_gp * ( 0.254829592
+ t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
@@ -842,7 +842,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
} else { if (link_type == 4) { # Binomial.cloglog
the_exp = exp (linear_terms)
the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
+ is_too_small = ((10000000 + the_exp) == 10000000);
the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);
g_Y = (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
@@ -874,52 +874,52 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
b_cumulant = zeros_r;
natural_parameters = zeros_r;
is_natural_parameter_log_zero = zeros_r;
- if (var_power == 1.0 & link_power == 0.0) { # Poisson.log
+ if (var_power == 1.0 & link_power == 0) { # Poisson.log
b_cumulant = exp (linear_terms);
- is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == -1.0/0.0);
natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
} else { if (var_power == 1.0 & link_power == 1.0) { # Poisson.id
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
b_cumulant = linear_terms;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == 0);
natural_parameters = log (linear_terms + is_natural_parameter_log_zero);
} else {isNaN = 1;}
} else { if (var_power == 1.0 & link_power == 0.5) { # Poisson.sqrt
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
b_cumulant = linear_terms ^ 2;
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ is_natural_parameter_log_zero = (linear_terms == 0);
natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);
} else {isNaN = 1;}
- } else { if (var_power == 1.0 & link_power > 0.0) { # Poisson.power_nonlog, pos
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
- is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+ } else { if (var_power == 1.0 & link_power > 0) { # Poisson.power_nonlog, pos
+ if (sum (linear_terms < 0) == 0) {
+ is_natural_parameter_log_zero = (linear_terms == 0);
b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
} else {isNaN = 1;}
} else { if (var_power == 1.0) { # Poisson.power_nonlog, neg
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = linear_terms ^ (1.0 / link_power);
natural_parameters = log (linear_terms) / link_power;
} else {isNaN = 1;}
} else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = - log (linear_terms);
natural_parameters = - linear_terms;
} else {isNaN = 1;}
} else { if (var_power == 2.0 & link_power == 1.0) { # Gamma.id
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = log (linear_terms);
natural_parameters = - 1.0 / linear_terms;
} else {isNaN = 1;}
- } else { if (var_power == 2.0 & link_power == 0.0) { # Gamma.log
+ } else { if (var_power == 2.0 & link_power == 0) { # Gamma.log
b_cumulant = linear_terms;
natural_parameters = - exp (- linear_terms);
} else { if (var_power == 2.0) { # Gamma.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
b_cumulant = log (linear_terms) / link_power;
natural_parameters = - linear_terms ^ (- 1.0 / link_power);
} else {isNaN = 1;}
- } else { if (link_power == 0.0) { # PowerDist.log
+ } else { if (link_power == 0) { # PowerDist.log
natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
} else { # PowerDist.power_nonlog
@@ -932,7 +932,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
} else { if ( 2 * link_power == 1.0 - var_power) {
natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
} else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
power = (1.0 - var_power) / link_power;
natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
} else {isNaN = 1;}
@@ -946,13 +946,13 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
} else { if ( 2 * link_power == 2.0 - var_power) {
b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
} else {
- if (sum (ppred (linear_terms, 0.0, "<=")) == 0) {
+ if (sum (linear_terms <= 0) == 0) {
power = (2.0 - var_power) / link_power;
b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
} else {isNaN = 1;}
}}}}
}}}}} }}}}}
- if (sum (is_natural_parameter_log_zero * abs (Y)) > 0.0) {
+ if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {
log_l = -1.0 / 0.0;
isNaN = 1;
}
@@ -970,8 +970,8 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
[Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
if (isNaN == 0) {
- does_prob_contradict = ppred (Y_prob, 0.0, "<=");
- if (sum (does_prob_contradict * abs (Y)) == 0.0) {
+ does_prob_contradict = (Y_prob <= 0);
+ if (sum (does_prob_contradict * abs (Y)) == 0) {
log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
isNaN = 1;
@@ -1014,18 +1014,18 @@ binomial_probability_two_column =
Y_prob = zeros_r %*% ones_2;
if (link_type == 1) { # Binomial.power
- if (link_power == 0.0) { # Binomial.log
+ if (link_power == 0) { # Binomial.log
Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;
} else { if (link_power == 0.5) { # Binomial.sqrt
Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;
} else { # Binomial.power_nonlog
- if (sum (ppred (linear_terms, 0.0, "<")) == 0) {
+ if (sum (linear_terms < 0) == 0) {
Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;
} else {isNaN = 1;}
}}
} else { # Binomial.non_power
- is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==");
- is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==");
+ is_LT_pos_infinite = (linear_terms == 1.0/0.0);
+ is_LT_neg_infinite = (linear_terms == -1.0/0.0);
is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;
finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0);
finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0);
@@ -1033,7 +1033,7 @@ binomial_probability_two_column =
Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;
Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);
} else { if (link_type == 3) { # Binomial.probit
- lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one;
+ lt_pos_neg = (finite_linear_terms >= 0) %*% p_one_m_one + ones_r %*% zero_one;
t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
pt_gp = t_gp * ( 0.254829592
+ t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
@@ -1045,7 +1045,7 @@ binomial_probability_two_column =
} else { if (link_type == 4) { # Binomial.cloglog
the_exp = exp (finite_linear_terms);
the_exp_exp = exp (- the_exp);
- is_too_small = ppred (10000000 + the_exp, 10000000, "==");
+ is_too_small = ((10000000 + the_exp) == 10000000);
Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
Y_prob [, 2] = the_exp_exp;
} else { if (link_type == 5) { # Binomial.cauchit
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/StepLinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/StepLinearRegDS.dml b/scripts/algorithms/StepLinearRegDS.dml
index 79a2803..8c643c3 100644
--- a/scripts/algorithms/StepLinearRegDS.dml
+++ b/scripts/algorithms/StepLinearRegDS.dml
@@ -219,7 +219,7 @@ linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double m_orig,
# Important assumption: X [, m_ext] = ones_n
avg_X_cols = t(colSums(X)) / n;
var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
- is_unsafe = ppred (var_X_cols, 0.0, "<=");
+ is_unsafe = (var_X_cols <= 0);
scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
scale_X [m_ext, 1] = 1;
shift_X = - avg_X_cols * scale_X;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/Univar-Stats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Univar-Stats.dml b/scripts/algorithms/Univar-Stats.dml
index 525118c..fd06d68 100644
--- a/scripts/algorithms/Univar-Stats.dml
+++ b/scripts/algorithms/Univar-Stats.dml
@@ -61,7 +61,7 @@ baseStats = matrix(0, rows=numBaseStats, cols=n);
# Compute max domain size among all categorical attributes
maxs = colMaxs(A);
-maxDomainSize = max( ppred(K, 1, ">") * maxs );
+maxDomainSize = max( (K > 1) * maxs );
maxDomain = as.integer(maxDomainSize);
parfor(i in 1:n, check=0) {
@@ -134,7 +134,7 @@ parfor(i in 1:n, check=0) {
mode = rowIndexMax(t(cat_counts));
mx = max(cat_counts)
- modeArr = ppred(cat_counts, mx, "==")
+ modeArr = (cat_counts == mx)
numModes = sum(modeArr);
# place the computed statistics in output matrices
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/decision-tree-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/decision-tree-predict.dml b/scripts/algorithms/decision-tree-predict.dml
index 3447da6..c375c13 100644
--- a/scripts/algorithms/decision-tree-predict.dml
+++ b/scripts/algorithms/decision-tree-predict.dml
@@ -79,7 +79,7 @@ R_scale = matrix (0, rows = 1, cols = 1);
if (fileR != " ") {
R = read (fileR);
- dummy_coded = ppred (R[,2], R[,3], "!=");
+ dummy_coded = (R[,2] != R[,3]);
R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows");
R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
} else { # only scale features available
@@ -112,7 +112,7 @@ parfor (i in 1:num_records, check = 0) {
cur_end_ind = as.scalar (R_cat[cur_feature,2]);
cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); # as.scalar (cur_sample[,cur_feature]);
cur_offset = as.scalar (M[5,cur_node_pos]);
- value_found = sum (ppred (M[6:(6 + cur_offset - 1),cur_node_pos], cur_value, "=="));
+ value_found = sum (M[6:(6 + cur_offset - 1),cur_node_pos] == cur_value);
if (value_found) { # go to left branch
cur_node_pos = cur_node_pos + as.scalar (M[2,cur_node_pos]);
} else { # go to right branch
@@ -126,7 +126,7 @@ if (fileY != " ") {
Y_test = read (fileY);
num_classes = ncol (Y_test);
Y_test = rowSums (Y_test * t (seq (1, num_classes)));
- result = ppred (Y_test, Y_predicted, "==");
+ result = (Y_test == Y_predicted);
result = sum (result);
accuracy = result / num_records * 100;
acc_str = "Accuracy (%): " + accuracy;