You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/26 02:13:13 UTC
[49/55] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/scripts/algorithms/GLM-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM-predict.dml b/scripts/algorithms/GLM-predict.dml
index 9edfaf2..5e998e3 100644
--- a/scripts/algorithms/GLM-predict.dml
+++ b/scripts/algorithms/GLM-predict.dml
@@ -1,444 +1,444 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# 
-# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A GLM-TYPE REGRESSION TO A NEW (TEST) DATASET
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE   DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X     String  ---     Location to read the matrix X of records (feature vectors)
-# B     String  ---     Location to read GLM regression parameters (the betas), with dimensions
-#                           ncol(X)   x k: do not add intercept
-#                           ncol(X)+1 x k: add intercept as given by the last B-row
-#                           if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
-# M     String  " "     Location to write the matrix of predicted response means/probabilities:
-#                           nrow(X) x 1  : for Power-type distributions (dfam=1)
-#                           nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
-#                           nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
-# Y     String  " "     Location to read response matrix Y, with the following dimensions:
-#                           nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
-#                           nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
-#                           nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
-# O     String  " "     Location to write the printed statistics; by default is standard output
-# dfam  Int     1       GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
-# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link  Int     0       Link function code: 0 = canonical (depends on distribution), 1 = Power,
-#                       2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
-# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
-#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# disp  Double  1.0     Dispersion value, when available
-# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: Matrix M of predicted means/probabilities, some statistics in CSV format (see below)
-# The statistics are printed one per each line, in the following CSV format:
-# NAME,[COLUMN],[SCALED],VALUE
-#   NAME   is the string identifier for the statistic, see the table below.
-#   COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
-#          note that a Binomial/Multinomial one-column Y input is converted into multi-column.
-#   SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
-#          dispersion parameter (disp) scaling has been applied to this statistic.
-#   VALUE  is the value of the statistic.
-#
-# NAME                  COLUMN  SCALED  MEANING
-# ---------------------------------------------------------------------------------------------
-# LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
-# LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
-# PEARSON_X2                      +     Pearson residual X^2 statistic
-# PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
-# PEARSON_X2_PVAL                 +     Pearson X^2 p-value
-# DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
-# DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
-# DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
-# AVG_TOT_Y               +             Average of Y column for a single response value
-# STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
-# AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
-# STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
-# PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
-# PLAIN_R2                +             Plain R^2 of Y column residual with bias included
-# ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
-# PLAIN_R2_NOBIAS         +             Plain R^2 of Y column residual with bias subtracted
-# ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
-# ---------------------------------------------------------------------------------------------
-#
-# Example with distribution = "Poisson.log":
-# hadoop jar SystemML.jar -f GLM_HOME/GLM-predict.dml -nvargs dfam=1 vpow=1.0 link=1 lpow=0.0
-#   disp=3.0 fmt=csv X=INPUT_DIR/X B=INPUT_DIR/B Y=INPUT_DIR/Y M=OUTPUT_DIR/M O=OUTPUT_DIR/out.csv
-
-# Default values for input parameters:
-fileX = $X;
-fileB = $B;
-fileM = ifdef ($M, " ");
-fileY = ifdef ($Y, " ");
-fileO = ifdef ($O, " ");
-fmtM  = ifdef ($fmt, "text");
-
-dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
-var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
-link_type  = ifdef ($link, 0);    # $link = 0;
-link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
-dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
-
-var_power  = as.double (var_power);
-link_power = as.double (link_power); 
-dispersion = as.double (dispersion);
-
-if (dist_type == 3) {
-    link_type = 2;
-} else { if (link_type == 0) { # Canonical Link
-    if (dist_type == 1) {
-        link_type = 1;
-        link_power = 1.0 - var_power;
-    } else { if (dist_type == 2) {
-            link_type = 2;
-}}} }
-
-X = read (fileX);
-num_records  = nrow (X);
-num_features = ncol (X);
-
-B_full = read (fileB);
-if (dist_type == 3) {
-    beta =  B_full [1 : ncol (X),  ];
-    intercept = B_full [nrow(B_full),  ];
-} else {
-    beta =  B_full [1 : ncol (X), 1];
-    intercept = B_full [nrow(B_full), 1];
-}
-if (nrow (B_full) == ncol (X)) {
-    intercept = 0.0 * intercept;
-    is_intercept = FALSE;
-} else {
-    num_features = num_features + 1;
-    is_intercept = TRUE;
-}
-
-ones_rec = matrix (1, rows = num_records, cols = 1);
-linear_terms = X %*% beta + ones_rec %*% intercept;
-[means, vars] =
-    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
-    
-if (fileM != " ") {
-    write (means, fileM, format=fmtM);
-}
-
-if (fileY != " ")
-{
-    Y = read (fileY);
-    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-    
-    # Statistics To Compute:
-    
-    Z_logl               = 0.0 / 0.0;
-    Z_logl_pValue        = 0.0 / 0.0;
-    X2_pearson           = 0.0 / 0.0;
-    df_pearson           = -1;
-    G2_deviance          = 0.0 / 0.0;
-    df_deviance          = -1;
-    X2_pearson_pValue    = 0.0 / 0.0;
-    G2_deviance_pValue   = 0.0 / 0.0;
-    Z_logl_scaled        = 0.0 / 0.0;
-    Z_logl_scaled_pValue = 0.0 / 0.0;
-    X2_scaled            = 0.0 / 0.0;
-    X2_scaled_pValue     = 0.0 / 0.0;
-    G2_scaled            = 0.0 / 0.0;
-    G2_scaled_pValue     = 0.0 / 0.0;
-    
-    if (dist_type == 1 & link_type == 1) {
-    #
-    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
-    #
-        if (link_power == 0.0) {
-            is_zero_Y = ppred (Y, 0.0, "==");
-            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
-        } else {
-            lt_saturated = Y ^ link_power;
-        }
-        Y_counts = ones_rec;
-
-        X2_pearson = sum ((Y - means) ^ 2 / vars);
-        df_pearson = num_records - num_features;
-
-        log_l_part = 
-            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
-        log_l_part_saturated = 
-            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
-            
-        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
-        df_deviance = num_records - num_features;
-        
-    } else { if (dist_type >= 2) {
-    #
-    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
-    #
-        if (ncol (Y) == 1) {
-            num_categories = ncol (beta) + 1;
-            if (min (Y) <= 0) { 
-                # Category labels "0", "-1" etc. are converted into the baseline label
-                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
-            }
-            Y_size = min (num_categories, max(Y));
-            Y_unsized = table (seq (1, num_records, 1), Y);
-            Y = matrix (0, rows = num_records, cols = num_categories);
-            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
-            Y_counts = ones_rec;
-        } else {
-            Y_counts = rowSums (Y);
-        }
-        
-        P = means;
-        zero_Y = ppred (Y, 0.0, "==");
-        zero_P = ppred (P, 0.0, "==");
-        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
-        
-        logl_vec = rowSums (Y *  log (P + zero_Y)   );
-        ent1_vec = rowSums (P *  log (P + zero_P)   );
-        ent2_vec = rowSums (P * (log (P + zero_P))^2);
-        E_logl   = sum (Y_counts * ent1_vec);
-        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
-        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
-        
-        means = means * (Y_counts %*% t(ones_ctg));
-        vars  = vars  * (Y_counts %*% t(ones_ctg));
-        
-        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
-        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
-        
-        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
-            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
-        }
-        
-        X2_pearson = sum ((Y - means) ^ 2 / means);
-        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
-        
-        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
-        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
-    }}
-    
-    if (Z_logl == Z_logl) {
-        Z_logl_absneg = - abs (Z_logl);
-        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
-    }
-    if (X2_pearson == X2_pearson & df_pearson > 0) {
-        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_deviance == G2_deviance & df_deviance > 0) {
-        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    Z_logl_scaled = Z_logl / sqrt (dispersion);
-    X2_scaled = X2_pearson / dispersion;
-    G2_scaled = G2_deviance / dispersion;
-
-    if (Z_logl_scaled == Z_logl_scaled) {
-        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
-        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
-    }
-    if (X2_scaled == X2_scaled & df_pearson > 0) {
-        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
-    }
-    if (G2_scaled == G2_scaled & df_deviance > 0) {
-        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
-    }
-    
-    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
-    avg_res_Y = colSums (Y - means) / sum (Y_counts);
-    
-    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
-    ss_res_Y     = colSums ((Y - means) ^ 2);
-    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
-    
-    df_ss_res_Y  = sum (Y_counts) - num_features;
-    if (is_intercept) {
-        df_ss_avg_res_Y = df_ss_res_Y;
-    } else {
-        df_ss_avg_res_Y = df_ss_res_Y - 1;
-    }
-    
-    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
-    if (df_ss_avg_res_Y > 0) {
-        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
-    } else {
-        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
-    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
-    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
-    if (df_ss_res_Y > 0) {
-        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
-    } else {
-        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
-    }
-    
-    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
-    
-    # PREPARING THE OUTPUT CSV STATISTICS FILE
-    
-    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
-    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
-    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
-    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
-    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
-    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
-    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
-    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
-    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
-    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
-    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
-    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
-    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
-    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
-
-    for (i in 1:ncol(Y)) {
-        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
-        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
-        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
-        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
-        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
-        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
-        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
-        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
-        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
-    }
-    
-    if (fileO != " ") {
-        write (str, fileO);
-    } else {
-        print (str);
-    }
-}
-
-glm_means_and_vars = 
-    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
-    return (Matrix[double] means, Matrix[double] vars)
-    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
-{
-    num_points = nrow (linear_terms);
-    if (dist_type == 1 & link_type == 1) {
-    # POWER DISTRIBUTION
-        if          (link_power ==  0.0) {
-            y_mean = exp (linear_terms);
-        } else { if (link_power ==  1.0) {
-            y_mean = linear_terms;
-        } else { if (link_power == -1.0) {
-            y_mean = 1.0 / linear_terms;
-        } else {
-            y_mean = linear_terms ^ (1.0 / link_power);
-        }}}
-        if (var_power == 0.0) {
-            var_function = matrix (1.0, rows = num_points, cols = 1);
-        } else { if (var_power == 1.0) {
-            var_function = y_mean;
-        } else {
-            var_function = y_mean ^ var_power;
-        }}
-        means = y_mean;
-        vars = var_function;
-    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
-    # BINOMIAL/BERNOULLI DISTRIBUTION
-        y_prob = matrix (0.0, rows = num_points, cols = 2);
-        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
-            y_prob [, 1]  = exp (linear_terms);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
-            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
-            y_prob [, 2]  = 1.0 - y_prob [, 1];
-        } else { if (link_type == 2)                      { # Binomial.logit
-            elt = exp (linear_terms);
-            y_prob [, 1]  = elt / (1.0 + elt);
-            y_prob [, 2]  = 1.0 / (1.0 + elt);
-        } else { if (link_type == 3)                      { # Binomial.probit
-            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
-            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
-            erf_corr =
-                t_gp * ( 0.254829592 
-              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
-              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
-              + t_gp * (-1.453152027 
-              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
-            y_prob [, 1] = (1 + sign_lt) - erf_corr;
-            y_prob [, 2] = (1 - sign_lt) + erf_corr;
-            y_prob = y_prob / 2;
-        } else { if (link_type == 4)                      { # Binomial.cloglog
-            elt = exp (linear_terms);
-            is_too_small = ppred (10000000 + elt, 10000000, "==");
-            y_prob [, 2] = exp (- elt);
-            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
-        } else { if (link_type == 5)                      { # Binomial.cauchit
-            atan_linear_terms = atan (linear_terms);
-            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
-            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
-        }}}}}}
-        means = y_prob;
-        ones_ctg = matrix (1, rows = 2, cols = 1);
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else { if (dist_type == 3) {
-    # MULTINOMIAL LOGIT DISTRIBUTION
-        elt = exp (linear_terms);
-        ones_pts = matrix (1, rows = num_points, cols = 1);
-        elt = append (elt, ones_pts);
-        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
-        means = elt / (rowSums (elt) %*% t(ones_ctg));
-        vars  = means * (means %*% (1 - diag (ones_ctg)));
-    } else {
-        means = matrix (0.0, rows = num_points, cols = 1);
-        vars  = matrix (0.0, rows = num_points, cols = 1);
-}   }}}
-
-glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
-    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
-    return (Matrix[double] log_l_part)
-{
-    num_records = nrow (Y);
-    if (var_power == 1.0) { # Poisson
-        if (link_power == 0.0)  { # Poisson.log
-            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
-            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
-            b_cumulant = exp (linear_terms);
-        } else {                  # Poisson.power_nonlog
-            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
-            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
-            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
-        }
-        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
-        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
-    } else {
-        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
-            natural_parameters = - exp (- linear_terms);
-            b_cumulant = linear_terms;
-        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
-            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
-            b_cumulant = log (linear_terms) / link_power;
-        } else { if (link_power == 0.0) { # PowerDist.log
-            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
-            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
-        } else {                          # PowerDist.power_nonlog
-            power_np = (1.0 - var_power) / link_power;
-            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
-            power_cu = (2.0 - var_power) / link_power;
-            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
-        }}}
-        log_l_part = Y * natural_parameters - b_cumulant;
-}   }
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A GLM-TYPE REGRESSION TO A NEW (TEST) DATASET
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X     String  ---     Location to read the matrix X of records (feature vectors)
+# B     String  ---     Location to read GLM regression parameters (the betas), with dimensions
+#                           ncol(X)   x k: do not add intercept
+#                           ncol(X)+1 x k: add intercept as given by the last B-row
+#                           if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
+# M     String  " "     Location to write the matrix of predicted response means/probabilities:
+#                           nrow(X) x 1  : for Power-type distributions (dfam=1)
+#                           nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
+#                           nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
+# Y     String  " "     Location to read response matrix Y, with the following dimensions:
+#                           nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
+#                           nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
+#                           nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
+# O     String  " "     Location to write the printed statistics; by default is standard output
+# dfam  Int     1       GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
+# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link  Int     0       Link function code: 0 = canonical (depends on distribution), 1 = Power,
+#                       2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
+# lpow  Double  1.0     Power for Link function defined as (mean)^power (ignored if link != 1):
+#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# disp  Double  1.0     Dispersion value, when available
+# fmt   String "text"   Matrix output format, usually "text" or "csv" (for matrices only)
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: Matrix M of predicted means/probabilities, some statistics in CSV format (see below)
+# The statistics are printed one per each line, in the following CSV format:
+# NAME,[COLUMN],[SCALED],VALUE
+#   NAME   is the string identifier for the statistic, see the table below.
+#   COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
+#          note that a Binomial/Multinomial one-column Y input is converted into multi-column.
+#   SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
+#          dispersion parameter (disp) scaling has been applied to this statistic.
+#   VALUE  is the value of the statistic.
+#
+# NAME                  COLUMN  SCALED  MEANING
+# ---------------------------------------------------------------------------------------------
+# LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
+# LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
+# PEARSON_X2                      +     Pearson residual X^2 statistic
+# PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
+# PEARSON_X2_PVAL                 +     Pearson X^2 p-value
+# DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
+# DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
+# DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
+# AVG_TOT_Y               +             Average of Y column for a single response value
+# STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
+# AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
+# STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
+# PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
+# PLAIN_R2                +             Plain R^2 of Y column residual with bias included
+# ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
+# PLAIN_R2_NOBIAS         +             Plain R^2 of Y column residual with bias subtracted
+# ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
+# ---------------------------------------------------------------------------------------------
+#
+# Example with distribution = "Poisson.log":
+# hadoop jar SystemML.jar -f GLM_HOME/GLM-predict.dml -nvargs dfam=1 vpow=1.0 link=1 lpow=0.0
+#   disp=3.0 fmt=csv X=INPUT_DIR/X B=INPUT_DIR/B Y=INPUT_DIR/Y M=OUTPUT_DIR/M O=OUTPUT_DIR/out.csv
+
+# Default values for input parameters:
+fileX = $X;
+fileB = $B;
+fileM = ifdef ($M, " ");
+fileY = ifdef ($Y, " ");
+fileO = ifdef ($O, " ");
+fmtM  = ifdef ($fmt, "text");
+
+dist_type  = ifdef ($dfam, 1);    # $dfam = 1;
+var_power  = ifdef ($vpow, 0.0);  # $vpow = 0.0;
+link_type  = ifdef ($link, 0);    # $link = 0;
+link_power = ifdef ($lpow, 1.0);  # $lpow = 1.0;
+dispersion = ifdef ($disp, 1.0);  # $disp = 1.0;
+
+var_power  = as.double (var_power);
+link_power = as.double (link_power); 
+dispersion = as.double (dispersion);
+
+if (dist_type == 3) {
+    link_type = 2;
+} else { if (link_type == 0) { # Canonical Link
+    if (dist_type == 1) {
+        link_type = 1;
+        link_power = 1.0 - var_power;
+    } else { if (dist_type == 2) {
+            link_type = 2;
+}}} }
+
+X = read (fileX);
+num_records  = nrow (X);
+num_features = ncol (X);
+
+B_full = read (fileB);
+if (dist_type == 3) {
+    beta =  B_full [1 : ncol (X),  ];
+    intercept = B_full [nrow(B_full),  ];
+} else {
+    beta =  B_full [1 : ncol (X), 1];
+    intercept = B_full [nrow(B_full), 1];
+}
+if (nrow (B_full) == ncol (X)) {
+    intercept = 0.0 * intercept;
+    is_intercept = FALSE;
+} else {
+    num_features = num_features + 1;
+    is_intercept = TRUE;
+}
+
+ones_rec = matrix (1, rows = num_records, cols = 1);
+linear_terms = X %*% beta + ones_rec %*% intercept;
+[means, vars] =
+    glm_means_and_vars (linear_terms, dist_type, var_power, link_type, link_power);
+    
+if (fileM != " ") {
+    write (means, fileM, format=fmtM);
+}
+
+if (fileY != " ")
+{
+    Y = read (fileY);
+    ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+    
+    # Statistics To Compute:
+    
+    Z_logl               = 0.0 / 0.0;
+    Z_logl_pValue        = 0.0 / 0.0;
+    X2_pearson           = 0.0 / 0.0;
+    df_pearson           = -1;
+    G2_deviance          = 0.0 / 0.0;
+    df_deviance          = -1;
+    X2_pearson_pValue    = 0.0 / 0.0;
+    G2_deviance_pValue   = 0.0 / 0.0;
+    Z_logl_scaled        = 0.0 / 0.0;
+    Z_logl_scaled_pValue = 0.0 / 0.0;
+    X2_scaled            = 0.0 / 0.0;
+    X2_scaled_pValue     = 0.0 / 0.0;
+    G2_scaled            = 0.0 / 0.0;
+    G2_scaled_pValue     = 0.0 / 0.0;
+    
+    if (dist_type == 1 & link_type == 1) {
+    #
+    # POWER DISTRIBUTIONS (GAUSSIAN, POISSON, GAMMA, ETC.)
+    #
+        if (link_power == 0.0) {
+            is_zero_Y = ppred (Y, 0.0, "==");
+            lt_saturated = log (Y + is_zero_Y) - is_zero_Y / (1.0 - is_zero_Y);
+        } else {
+            lt_saturated = Y ^ link_power;
+        }
+        Y_counts = ones_rec;
+
+        X2_pearson = sum ((Y - means) ^ 2 / vars);
+        df_pearson = num_records - num_features;
+
+        log_l_part = 
+            glm_partial_loglikelihood_for_power_dist_and_link (linear_terms, Y, var_power, link_power);
+        log_l_part_saturated = 
+            glm_partial_loglikelihood_for_power_dist_and_link (lt_saturated, Y, var_power, link_power);
+            
+        G2_deviance = 2 * sum (log_l_part_saturated) - 2 * sum (log_l_part);
+        df_deviance = num_records - num_features;
+        
+    } else { if (dist_type >= 2) {
+    #
+    # BINOMIAL AND MULTINOMIAL DISTRIBUTIONS
+    #
+        if (ncol (Y) == 1) {
+            num_categories = ncol (beta) + 1;
+            if (min (Y) <= 0) { 
+                # Category labels "0", "-1" etc. are converted into the baseline label
+                Y = Y + (- Y + num_categories) * ppred (Y, 0, "<=");
+            }
+            Y_size = min (num_categories, max(Y));
+            Y_unsized = table (seq (1, num_records, 1), Y);
+            Y = matrix (0, rows = num_records, cols = num_categories);
+            Y [, 1 : Y_size] = Y_unsized [, 1 : Y_size];
+            Y_counts = ones_rec;
+        } else {
+            Y_counts = rowSums (Y);
+        }
+        
+        P = means;
+        zero_Y = ppred (Y, 0.0, "==");
+        zero_P = ppred (P, 0.0, "==");
+        ones_ctg = matrix (1, rows = ncol(Y), cols = 1);
+        
+        logl_vec = rowSums (Y *  log (P + zero_Y)   );
+        ent1_vec = rowSums (P *  log (P + zero_P)   );
+        ent2_vec = rowSums (P * (log (P + zero_P))^2);
+        E_logl   = sum (Y_counts * ent1_vec);
+        V_logl   = sum (Y_counts * (ent2_vec - ent1_vec ^ 2));
+        Z_logl   = (sum (logl_vec) - E_logl) / sqrt (V_logl);
+        
+        means = means * (Y_counts %*% t(ones_ctg));
+        vars  = vars  * (Y_counts %*% t(ones_ctg));
+        
+        frac_below_5 = sum (ppred (means, 5, "<")) / (nrow (means) * ncol (means));
+        frac_below_1 = sum (ppred (means, 1, "<")) / (nrow (means) * ncol (means));
+        
+        if (frac_below_5 > 0.2 | frac_below_1 > 0.0) {
+            print ("WARNING: residual statistics are inaccurate here due to low cell means.");
+        }
+        
+        X2_pearson = sum ((Y - means) ^ 2 / means);
+        df_pearson = (num_records - num_features) * (ncol(Y) - 1);
+        
+        G2_deviance = 2 * sum (Y * log ((Y + zero_Y) / (means + zero_Y)));
+        df_deviance = (num_records - num_features) * (ncol(Y) - 1);
+    }}
+    
+    if (Z_logl == Z_logl) {
+        Z_logl_absneg = - abs (Z_logl);
+        Z_logl_pValue = 2.0 * pnorm(target = Z_logl_absneg);
+    }
+    if (X2_pearson == X2_pearson & df_pearson > 0) {
+        X2_pearson_pValue = pchisq(target = X2_pearson, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_deviance == G2_deviance & df_deviance > 0) {
+        G2_deviance_pValue = pchisq(target = G2_deviance, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    Z_logl_scaled = Z_logl / sqrt (dispersion);
+    X2_scaled = X2_pearson / dispersion;
+    G2_scaled = G2_deviance / dispersion;
+
+    if (Z_logl_scaled == Z_logl_scaled) {
+        Z_logl_scaled_absneg = - abs (Z_logl_scaled);
+        Z_logl_scaled_pValue = 2.0 * pnorm(target = Z_logl_scaled_absneg);
+    }
+    if (X2_scaled == X2_scaled & df_pearson > 0) {
+        X2_scaled_pValue = pchisq(target = X2_scaled, df = df_pearson, lower.tail=FALSE);
+    }
+    if (G2_scaled == G2_scaled & df_deviance > 0) {
+        G2_scaled_pValue = pchisq(target = G2_scaled, df = df_deviance, lower.tail=FALSE);
+    }
+    
+    avg_tot_Y = colSums (    Y    ) / sum (Y_counts);
+    avg_res_Y = colSums (Y - means) / sum (Y_counts);
+    
+    ss_avg_tot_Y = colSums ((    Y     - Y_counts %*% avg_tot_Y) ^ 2);
+    ss_res_Y     = colSums ((Y - means) ^ 2);
+    ss_avg_res_Y = colSums ((Y - means - Y_counts %*% avg_res_Y) ^ 2);
+    
+    df_ss_res_Y  = sum (Y_counts) - num_features;
+    if (is_intercept) {
+        df_ss_avg_res_Y = df_ss_res_Y;
+    } else {
+        df_ss_avg_res_Y = df_ss_res_Y - 1;
+    }
+    
+    var_tot_Y = ss_avg_tot_Y / (sum (Y_counts) - 1);
+    if (df_ss_avg_res_Y > 0) {
+        var_res_Y = ss_avg_res_Y / df_ss_avg_res_Y;
+    } else {
+        var_res_Y = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    plain_R2_nobias  = 1 - ss_avg_res_Y / ss_avg_tot_Y;
+    adjust_R2_nobias = 1 - var_res_Y / var_tot_Y;
+    plain_R2  = 1 - ss_res_Y / ss_avg_tot_Y;
+    if (df_ss_res_Y > 0) {
+        adjust_R2 = 1 - (ss_res_Y / df_ss_res_Y) / var_tot_Y;
+    } else {
+        adjust_R2 = matrix (0.0, rows = 1, cols = ncol (Y)) / 0.0;
+    }
+    
+    predicted_avg_var_res_Y = dispersion * colSums (vars) / sum (Y_counts);
+    
+    # PREPARING THE OUTPUT CSV STATISTICS FILE
+    
+    str = "LOGLHOOD_Z,,FALSE," + Z_logl;
+    str = append (str, "LOGLHOOD_Z_PVAL,,FALSE," + Z_logl_pValue);
+    str = append (str, "PEARSON_X2,,FALSE," + X2_pearson);
+    str = append (str, "PEARSON_X2_BY_DF,,FALSE," + (X2_pearson / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,FALSE," + X2_pearson_pValue);
+    str = append (str, "DEVIANCE_G2,,FALSE," + G2_deviance);
+    str = append (str, "DEVIANCE_G2_BY_DF,,FALSE," + (G2_deviance / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,FALSE," + G2_deviance_pValue);
+    str = append (str, "LOGLHOOD_Z,,TRUE," + Z_logl_scaled);
+    str = append (str, "LOGLHOOD_Z_PVAL,,TRUE," + Z_logl_scaled_pValue);
+    str = append (str, "PEARSON_X2,,TRUE," + X2_scaled);
+    str = append (str, "PEARSON_X2_BY_DF,,TRUE," + (X2_scaled / df_pearson));
+    str = append (str, "PEARSON_X2_PVAL,,TRUE," + X2_scaled_pValue);
+    str = append (str, "DEVIANCE_G2,,TRUE," + G2_scaled);
+    str = append (str, "DEVIANCE_G2_BY_DF,,TRUE," + (G2_scaled / df_deviance));
+    str = append (str, "DEVIANCE_G2_PVAL,,TRUE," + G2_scaled_pValue);
+
+    for (i in 1:ncol(Y)) {
+        str = append (str, "AVG_TOT_Y," + i + ",," + castAsScalar (avg_tot_Y [1, i]));
+        str = append (str, "STDEV_TOT_Y," + i + ",," + castAsScalar (sqrt (var_tot_Y [1, i])));
+        str = append (str, "AVG_RES_Y," + i + ",," + castAsScalar (avg_res_Y [1, i]));
+        str = append (str, "STDEV_RES_Y," + i + ",," + castAsScalar (sqrt (var_res_Y [1, i])));
+        str = append (str, "PRED_STDEV_RES," + i + ",TRUE," + castAsScalar (sqrt (predicted_avg_var_res_Y [1, i])));
+        str = append (str, "PLAIN_R2," + i + ",," + castAsScalar (plain_R2 [1, i]));
+        str = append (str, "ADJUSTED_R2," + i + ",," + castAsScalar (adjust_R2 [1, i]));
+        str = append (str, "PLAIN_R2_NOBIAS," + i + ",," + castAsScalar (plain_R2_nobias [1, i]));
+        str = append (str, "ADJUSTED_R2_NOBIAS," + i + ",," + castAsScalar (adjust_R2_nobias [1, i]));
+    }
+    
+    if (fileO != " ") {
+        write (str, fileO);
+    } else {
+        print (str);
+    }
+}
+
+glm_means_and_vars = 
+    function (Matrix[double] linear_terms, int dist_type, double var_power, int link_type, double link_power)
+    return (Matrix[double] means, Matrix[double] vars)
+    # NOTE: "vars" represents the variance without dispersion, i.e. the V(mu) function.
+{
+    num_points = nrow (linear_terms);
+    if (dist_type == 1 & link_type == 1) {
+    # POWER DISTRIBUTION
+        if          (link_power ==  0.0) {
+            y_mean = exp (linear_terms);
+        } else { if (link_power ==  1.0) {
+            y_mean = linear_terms;
+        } else { if (link_power == -1.0) {
+            y_mean = 1.0 / linear_terms;
+        } else {
+            y_mean = linear_terms ^ (1.0 / link_power);
+        }}}
+        if (var_power == 0.0) {
+            var_function = matrix (1.0, rows = num_points, cols = 1);
+        } else { if (var_power == 1.0) {
+            var_function = y_mean;
+        } else {
+            var_function = y_mean ^ var_power;
+        }}
+        means = y_mean;
+        vars = var_function;
+    } else { if (dist_type == 2 & link_type >= 1 & link_type <= 5) {
+    # BINOMIAL/BERNOULLI DISTRIBUTION
+        y_prob = matrix (0.0, rows = num_points, cols = 2);
+        if          (link_type == 1 & link_power == 0.0)  { # Binomial.log
+            y_prob [, 1]  = exp (linear_terms);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 1 & link_power != 0.0)  { # Binomial.power_nonlog
+            y_prob [, 1]  = linear_terms ^ (1.0 / link_power);
+            y_prob [, 2]  = 1.0 - y_prob [, 1];
+        } else { if (link_type == 2)                      { # Binomial.logit
+            elt = exp (linear_terms);
+            y_prob [, 1]  = elt / (1.0 + elt);
+            y_prob [, 2]  = 1.0 / (1.0 + elt);
+        } else { if (link_type == 3)                      { # Binomial.probit
+            sign_lt = 2 * ppred (linear_terms, 0.0, ">=") - 1;
+            t_gp = 1.0 / (1.0 + abs (linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
+            erf_corr =
+                t_gp * ( 0.254829592 
+              + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
+              + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
+              + t_gp * (-1.453152027 
+              + t_gp *   1.061405429)))) * sign_lt * exp (- (linear_terms ^ 2) / 2.0);
+            y_prob [, 1] = (1 + sign_lt) - erf_corr;
+            y_prob [, 2] = (1 - sign_lt) + erf_corr;
+            y_prob = y_prob / 2;
+        } else { if (link_type == 4)                      { # Binomial.cloglog
+            elt = exp (linear_terms);
+            is_too_small = ppred (10000000 + elt, 10000000, "==");
+            y_prob [, 2] = exp (- elt);
+            y_prob [, 1] = (1 - is_too_small) * (1.0 - y_prob [, 2]) + is_too_small * elt * (1.0 - elt / 2);
+        } else { if (link_type == 5)                      { # Binomial.cauchit
+            atan_linear_terms = atan (linear_terms);
+            y_prob [, 1] = 0.5 + atan_linear_terms / 3.1415926535897932384626433832795;
+            y_prob [, 2] = 0.5 - atan_linear_terms / 3.1415926535897932384626433832795;
+        }}}}}}
+        means = y_prob;
+        ones_ctg = matrix (1, rows = 2, cols = 1);
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else { if (dist_type == 3) {
+    # MULTINOMIAL LOGIT DISTRIBUTION
+        elt = exp (linear_terms);
+        ones_pts = matrix (1, rows = num_points, cols = 1);
+        elt = append (elt, ones_pts);
+        ones_ctg = matrix (1, rows = ncol (elt), cols = 1);
+        means = elt / (rowSums (elt) %*% t(ones_ctg));
+        vars  = means * (means %*% (1 - diag (ones_ctg)));
+    } else {
+        means = matrix (0.0, rows = num_points, cols = 1);
+        vars  = matrix (0.0, rows = num_points, cols = 1);
+}   }}}
+
+glm_partial_loglikelihood_for_power_dist_and_link =   # Assumes: dist_type == 1 & link_type == 1
+    function (Matrix[double] linear_terms, Matrix[double] Y, double var_power, double link_power)
+    return (Matrix[double] log_l_part)
+{
+    num_records = nrow (Y);
+    if (var_power == 1.0) { # Poisson
+        if (link_power == 0.0)  { # Poisson.log
+            is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==");
+            natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0);
+            b_cumulant = exp (linear_terms);
+        } else {                  # Poisson.power_nonlog
+            is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==");
+            natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;
+            b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;
+        }
+        is_minus_infinity = ppred (Y, 0, ">") * is_natural_parameter_log_zero;
+        log_l_part = Y * natural_parameters - b_cumulant - is_minus_infinity / (1 - is_minus_infinity);
+    } else {
+        if (var_power == 2.0 & link_power == 0.0)  { # Gamma.log
+            natural_parameters = - exp (- linear_terms);
+            b_cumulant = linear_terms;
+        } else { if (var_power == 2.0)  { # Gamma.power_nonlog
+            natural_parameters = - linear_terms ^ (- 1.0 / link_power);
+            b_cumulant = log (linear_terms) / link_power;
+        } else { if (link_power == 0.0) { # PowerDist.log
+            natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);
+            b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
+        } else {                          # PowerDist.power_nonlog
+            power_np = (1.0 - var_power) / link_power;
+            natural_parameters = (linear_terms ^ power_np) / (1.0 - var_power);
+            power_cu = (2.0 - var_power) / link_power;
+            b_cumulant = (linear_terms ^ power_cu) / (2.0 - var_power);
+        }}}
+        log_l_part = Y * natural_parameters - b_cumulant;
+}   }