You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2022/06/10 14:22:29 UTC
[systemds] 02/05: [DOCS] Update builtin scripts docs

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit a0254e5d2c204a1f841226eb399620f2b1bd97e6
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Fri Jun 10 16:18:33 2022 +0200

    [DOCS] Update builtin scripts docs
---
 scripts/builtin/WoE.dml                    |   9 +--
 scripts/builtin/WoEApply.dml               |   8 +-
 scripts/builtin/abstain.dml                |  16 ++--
 scripts/builtin/confusionMatrix.dml        |   7 +-
 scripts/builtin/correctTypos.dml           |  12 +--
 scripts/builtin/correctTyposApply.dml      |  12 +--
 scripts/builtin/dbscanApply.dml            |   4 -
 scripts/builtin/denialConstraints.dml      |   4 +-
 scripts/builtin/fit_pipeline.dml           |   4 -
 scripts/builtin/fixInvalidLengthsApply.dml |   4 -
 scripts/builtin/glm.dml                    | 118 +++++++++++++++--------------
 scripts/builtin/glmPredict.dml             |  64 +++++++++-------
 scripts/builtin/hyperband.dml              |   6 +-
 scripts/builtin/img_brightness.dml         |   5 +-
 scripts/builtin/knn.dml                    |   4 +-
 scripts/builtin/matrixProfile.dml          |  14 ++--
 scripts/builtin/steplm.dml                 |  17 +++--
 17 files changed, 154 insertions(+), 154 deletions(-)

diff --git a/scripts/builtin/WoE.dml b/scripts/builtin/WoE.dml
index 003d9b48a6..7bc938eedd 100644
--- a/scripts/builtin/WoE.dml
+++ b/scripts/builtin/WoE.dml
@@ -30,13 +30,12 @@
 #
 # OUTPUT:
 # ------------------------------------------------
-# X              ---
-# Y              ---
-# entropyMatrix  ---
+# F              Weighted X matrix where the entropy mask is applied
+# entropyMatrix  A entropy matrix to apply to data
 # ------------------------------------------------
 
 m_WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask)
-return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) {
+return (Matrix[Double] F, Matrix[Double] entropyMatrix) {
   
   tempX = replace(target=X, pattern=NaN, replacement=1)
   entropyMatrix = matrix(0, rows=ncol(tempX), cols = max((tempX*mask)))
@@ -53,7 +52,7 @@ return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) {
     
     }
   }
-  X = WoEApply(X, Y, entropyMatrix)
+  F = WoEApply(X, Y, entropyMatrix)
 }
 
 
diff --git a/scripts/builtin/WoEApply.dml b/scripts/builtin/WoEApply.dml
index c27fae0d05..6f86a266d3 100644
--- a/scripts/builtin/WoEApply.dml
+++ b/scripts/builtin/WoEApply.dml
@@ -30,12 +30,12 @@
 #
 # OUTPUT:
 # ------------------------------------------------
-# X              ---
+# F              Weighted X matrix where the entropy mask is applied
 # ------------------------------------------------
 
 m_WoEApply = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix)
-return (Matrix[Double] X) {
-  
+return (Matrix[Double] F) {
+  F = matrix(1, nRow(X), nCol(X)) # allocate dense output matrix
   for(i in 1:ncol(X))
   {
     if(sum(abs(entropyMatrix[i])) > 0)
@@ -46,7 +46,7 @@ return (Matrix[Double] X) {
       resp = matrix(0, nrow(L), idx)
       resp = (resp + t(seq(1, idx))) == L
       resp = resp * entropy
-      X[, i] = rowSums(resp)
+      F[, i] = rowSums(resp)
     }  
   }
   
diff --git a/scripts/builtin/abstain.dml b/scripts/builtin/abstain.dml
index 6d9035101d..5a4c354327 100644
--- a/scripts/builtin/abstain.dml
+++ b/scripts/builtin/abstain.dml
@@ -24,16 +24,16 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------------
-# X            Location to read the matrix of feature vectors
-# Y            Location to read the matrix with category labels
-# threshold    ---
+# X            matrix of feature vectors
+# Y            matrix with category labels
+# threshold    threshold to clear otherwise return X and Y unmodified
 # verbose      flag specifying if logging information should be printed
 # -------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------
-# Xout     ---
-# Yout     ---
+# Xout     abstained output X
+# Yout     abstained output Y
 # -------------------------------------------------------------------------------------
 
 m_abstain = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
@@ -41,16 +41,14 @@ return (Matrix[Double] Xout, Matrix[Double] Yout)
 {
   Xout = X
   Yout = Y
-  # for(i in 1:100) {
   if(min(Y) != max(Y))
   {
-    betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=FALSE)
+    betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose)
     [prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
-    # abstain = cbind(X, Y)
+    
     inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
     if(sum(inc) > 0)
     {
-      # print("inc vector "+toString(inc))
       Xout = removeEmpty(target = X, margin = "rows", select = (inc == 0) )
       Yout = removeEmpty(target = Y, margin = "rows", select = (inc == 0) )
     }
diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml
index a56a936cc9..c15b82621c 100644
--- a/scripts/builtin/confusionMatrix.dml
+++ b/scripts/builtin/confusionMatrix.dml
@@ -24,17 +24,14 @@
 # After which, it calculates and returns the sum of classifications
 # and the average of each true class.
 #
+# .. code-block:: txt
+#
 #                   True Labels
 #                     1    2
 #                 1   TP | FP
 #   Predictions      ----+----
 #                 2   FN | TN
 #
-# TP = True Positives
-# FP = False Positives
-# FN = False Negatives
-# TN = True Negatives
-#
 # INPUT:
 # --------------------------------------------------------------------------------
 # P     vector of Predictions
diff --git a/scripts/builtin/correctTypos.dml b/scripts/builtin/correctTypos.dml
index 2fddfaec1b..01d837a7a2 100644
--- a/scripts/builtin/correctTypos.dml
+++ b/scripts/builtin/correctTypos.dml
@@ -24,11 +24,13 @@
 # and simply swaps strings that do not occur often with similar strings that 
 # occur more often
 #
-# References:
-# Fred J. Damerau. 1964. 
-#   A technique for computer detection and correction of spelling errors. 
-#   Commun. ACM 7, 3 (March 1964), 171–176. 
-#   DOI:https://doi.org/10.1145/363958.363994
+# .. code-block:: txt
+#
+#   References:
+#   Fred J. Damerau. 1964. 
+#     A technique for computer detection and correction of spelling errors. 
+#     Commun. ACM 7, 3 (March 1964), 171–176. 
+#     DOI:https://doi.org/10.1145/363958.363994
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------
diff --git a/scripts/builtin/correctTyposApply.dml b/scripts/builtin/correctTyposApply.dml
index 050bb8132b..3ca4635bfa 100644
--- a/scripts/builtin/correctTyposApply.dml
+++ b/scripts/builtin/correctTyposApply.dml
@@ -24,11 +24,13 @@
 # and simply swaps strings that do not occur often with similar strings that 
 # occur more often
 #
-# References:
-# Fred J. Damerau. 1964. 
-#   A technique for computer detection and correction of spelling errors. 
-#   Commun. ACM 7, 3 (March 1964), 171–176. 
-#   DOI:https://doi.org/10.1145/363958.363994
+# .. code-block:: txt
+#
+#   References:
+#   Fred J. Damerau. 1964. 
+#     A technique for computer detection and correction of spelling errors. 
+#     Commun. ACM 7, 3 (March 1964), 171–176. 
+#     DOI:https://doi.org/10.1145/363958.363994
 #
 # TODO: future: add parameter for list of words that are sure to be correct
 #
diff --git a/scripts/builtin/dbscanApply.dml b/scripts/builtin/dbscanApply.dml
index 4a7eb7e6ed..e3ab9723cb 100644
--- a/scripts/builtin/dbscanApply.dml
+++ b/scripts/builtin/dbscanApply.dml
@@ -23,8 +23,6 @@
 #
 # INPUT:
 # ---------------------------------------------
-# NAME           MEANING
-# ---------------------------------------------
 # X              The input Matrix to do outlier detection on.
 # clusterModel   Model of clusters to predict outliers against.
 # eps            Maximum distance between two points for one to be considered reachable for the other.
@@ -32,8 +30,6 @@
 #
 # OUTPUT:
 # ----------------------------------------------
-# NAME            MEANING
-# ----------------------------------------------
 # outlierPoints   Predicted outliers
 # ----------------------------------------------
 
diff --git a/scripts/builtin/denialConstraints.dml b/scripts/builtin/denialConstraints.dml
index 23453979e1..d2dc3cfb40 100644
--- a/scripts/builtin/denialConstraints.dml
+++ b/scripts/builtin/denialConstraints.dml
@@ -21,6 +21,8 @@
 
 # This function considers some constraints indicating statements that can NOT happen in the data (denial constraints).
 #
+# .. code-block:: txt
+#
 #   EXAMPLE:
 #   dataFrame:
 #
@@ -48,7 +50,7 @@
 #   4     variableCompare   TRUE       discipline          B              yrs.service    >          yrs.since.phd
 #
 #
-#   Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
+# Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
 #
 # INPUT:
 # ----------------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/fit_pipeline.dml b/scripts/builtin/fit_pipeline.dml
index 4b4665e59e..96023f7b49 100644
--- a/scripts/builtin/fit_pipeline.dml
+++ b/scripts/builtin/fit_pipeline.dml
@@ -25,8 +25,6 @@
 #
 # INPUT:
 # -------------------------------------------------------------------------------
-# NAME             MEANING
-# -------------------------------------------------------------------------------
 # trainData        ---
 # testData         ---
 # metaData         ---
@@ -41,8 +39,6 @@
 #
 # OUTPUT:
 # ------------------------------------------------------------------------------------------------
-# NAME      MEANING
-# ------------------------------------------------------------------------------------------------
 # scores    ---
 # ------------------------------------------------------------------------------------------------
 
diff --git a/scripts/builtin/fixInvalidLengthsApply.dml b/scripts/builtin/fixInvalidLengthsApply.dml
index a8c10dc052..e566bcbc68 100644
--- a/scripts/builtin/fixInvalidLengthsApply.dml
+++ b/scripts/builtin/fixInvalidLengthsApply.dml
@@ -23,8 +23,6 @@
 #
 # INPUT:
 # ------------------------
-# NAME   MEANING
-# ------------------------
 # X      ---
 # mask   ---
 # ql     ---
@@ -33,8 +31,6 @@
 #
 # OUTPUT:
 # ------------------------
-# NAME  MEANING
-# ------------------------
 # out   ---
 # M     ---
 # ------------------------
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
index c07a98337a..44b0c8cb1e 100644
--- a/scripts/builtin/glm.dml
+++ b/scripts/builtin/glm.dml
@@ -25,67 +25,71 @@
 # In addition, some GLM statistics are provided as console output by setting verbose=TRUE, one comma-separated name-value
 # pair per each line, as follows:
 #
-# ----------------------------------------------------------------------------------------------------------------------
-# TERMINATION_CODE      A positive integer indicating success/failure as follows:
-#                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
-#                       3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
-# BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
-# BETA_MIN_INDEX        Column index for the smallest beta value
-# BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
-# BETA_MAX_INDEX        Column index for the largest beta value
-# INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
-# DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
-#                       or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
-# DISPERSION_EST        Dispersion estimated from the dataset
-# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
-# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
-# ----------------------------------------------------------------------------------------------------------------------
+# .. code-block:: txt
 #
-# The Log file, when requested, contains the following per-iteration variables in CSV format,
-# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
-#
-# ----------------------------------------------------------------------------------------------------------------------
-# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
-# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
-# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
-# OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
-# OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
-# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
-# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
-# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
-# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
-# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
-# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
-# TRUST_DELTA           Updated trust region size, the "delta"
-# ----------------------------------------------------------------------------------------------------------------------
+#   --------------------------------------------------------------------------------------------
+#   TERMINATION_CODE      A positive integer indicating success/failure as follows:
+#                         1 = Converged successfully; 2 = Maximum number of iterations reached; 
+#                         3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+#   BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
+#   BETA_MIN_INDEX        Column index for the smallest beta value
+#   BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
+#   BETA_MAX_INDEX        Column index for the largest beta value
+#   INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+#   DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
+#                         or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+#   DISPERSION_EST        Dispersion estimated from the dataset
+#   DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
+#   DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
+#   --------------------------------------------------------------------------------------------
+#   
+#   The Log file, when requested, contains the following per-iteration variables in CSV format,
+#   each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+#   
+#   --------------------------------------------------------------------------------------------
+#   NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
+#   IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+#   POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
+#   OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
+#   OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
+#   OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
+#   OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
+#   GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
+#   LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
+#   LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
+#   IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
+#   TRUST_DELTA           Updated trust region size, the "delta"
+#   --------------------------------------------------------------------------------------------
 #
 # SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
 # AND LINK FUNCTIONS:
 #
-# dfam vpow link lpow  Distribution.link   nical?
-# ----------------------------------------------------------------------------------------------------------------------
-#  1   0.0   1  -1.0   Gaussian.inverse
-#  1   0.0   1   0.0   Gaussian.log
-#  1   0.0   1   1.0   Gaussian.id          Yes
-#  1   1.0   1   0.0   Poisson.log          Yes
-#  1   1.0   1   0.5   Poisson.sqrt
-#  1   1.0   1   1.0   Poisson.id
-#  1   2.0   1  -1.0   Gamma.inverse        Yes
-#  1   2.0   1   0.0   Gamma.log
-#  1   2.0   1   1.0   Gamma.id
-#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
-#  1   3.0   1  -1.0   InvGaussian.inverse
-#  1   3.0   1   0.0   InvGaussian.log
-#  1   3.0   1   1.0   InvGaussian.id
-#  1    *    1    *    AnyVariance.AnyLink
-# ----------------------------------------------------------------------------------------------------------------------
-#  2    *    1   0.0   Binomial.log
-#  2    *    1   0.5   Binomial.sqrt
-#  2    *    2    *    Binomial.logit       Yes
-#  2    *    3    *    Binomial.probit
-#  2    *    4    *    Binomial.cloglog
-#  2    *    5    *    Binomial.cauchit
-# ----------------------------------------------------------------------------------------------------------------------
+# .. code-block:: txt
+#
+#   dfam vpow link lpow  Distribution.link   nical?
+#   ---------------------------------------------------
+#    1   0.0   1  -1.0   Gaussian.inverse
+#    1   0.0   1   0.0   Gaussian.log
+#    1   0.0   1   1.0   Gaussian.id          Yes
+#    1   1.0   1   0.0   Poisson.log          Yes
+#    1   1.0   1   0.5   Poisson.sqrt
+#    1   1.0   1   1.0   Poisson.id
+#    1   2.0   1  -1.0   Gamma.inverse        Yes
+#    1   2.0   1   0.0   Gamma.log
+#    1   2.0   1   1.0   Gamma.id
+#    1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+#    1   3.0   1  -1.0   InvGaussian.inverse
+#    1   3.0   1   0.0   InvGaussian.log
+#    1   3.0   1   1.0   InvGaussian.id
+#    1    *    1    *    AnyVariance.AnyLink
+#   ---------------------------------------------------
+#    2    *    1   0.0   Binomial.log
+#    2    *    1   0.5   Binomial.sqrt
+#    2    *    2    *    Binomial.logit       Yes
+#    2    *    3    *    Binomial.probit
+#    2    *    4    *    Binomial.cloglog
+#    2    *    5    *    Binomial.cauchit
+#   ---------------------------------------------------
 #
 # INPUT:
 # --------------------------------------------------------------------------------------------
@@ -111,7 +115,7 @@
 # mii      Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
 # verbose  if the Algorithm should be verbose
 # ------------------------------------------------------------------------------------------
-# 
+#
 # OUTPUT:
 # --------------------------------------------------------------------------------------------
 # beta  Matrix beta, whose size depends on icpt:
diff --git a/scripts/builtin/glmPredict.dml b/scripts/builtin/glmPredict.dml
index 3c0e09c6ba..cde4e17ea8 100644
--- a/scripts/builtin/glmPredict.dml
+++ b/scripts/builtin/glmPredict.dml
@@ -22,35 +22,41 @@
 # Applies the estimated parameters of a GLM type regression to a new dataset
 #
 # Additional statistics are printed one per each line, in the following 
-# CSV format: NAME,[COLUMN],[SCALED],VALUE
-# ---
-# NAME   is the string identifier for the statistic, see the table below.
-# COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
-#        note that a Binomial/Multinomial one-column Y input is converted into multi-column.
-# SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
-#          dispersion parameter (disp) scaling has been applied to this statistic.
-# VALUE  is the value of the statistic.
-# ---
-# NAME                  COLUMN  SCALED  MEANING
-# ---------------------------------------------------------------------------------------------
-# LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
-# LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
-# PEARSON_X2                      +     Pearson residual X^2 statistic
-# PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
-# PEARSON_X2_PVAL                 +     Pearson X^2 p-value
-# DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
-# DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
-# DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
-# AVG_TOT_Y               +             Average of Y column for a single response value
-# STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
-# AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
-# STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
-# PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
-# R2                      +             R^2 of Y column residual with bias included
-# ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
-# R2_NOBIAS               +             R^2 of Y column residual with bias subtracted
-# ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
-# ---------------------------------------------------------------------------------------------
+#
+# .. code-block:: txt
+#
+#   CSV format: NAME,[COLUMN],[SCALED],VALUE
+#   ---
+#   NAME   is the string identifier for the statistic, see the table below.
+#   COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
+#          note that a Binomial/Multinomial one-column Y input is converted into multi-column.
+#   SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
+#            dispersion parameter (disp) scaling has been applied to this statistic.
+#   VALUE  is the value of the statistic.
+#   ---
+#
+# .. code-block:: txt
+#
+#   NAME                  COLUMN  SCALED  MEANING
+#   ---------------------------------------------------------------------------------------------
+#   LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
+#   LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
+#   PEARSON_X2                      +     Pearson residual X^2 statistic
+#   PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
+#   PEARSON_X2_PVAL                 +     Pearson X^2 p-value
+#   DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
+#   DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
+#   DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
+#   AVG_TOT_Y               +             Average of Y column for a single response value
+#   STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
+#   AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
+#   STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
+#   PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
+#   R2                      +             R^2 of Y column residual with bias included
+#   ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
+#   R2_NOBIAS               +             R^2 of Y column residual with bias subtracted
+#   ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
+#   ---------------------------------------------------------------------------------------------
 #
 # INPUT:
 # -------------------------------------------------------------------
diff --git a/scripts/builtin/hyperband.dml b/scripts/builtin/hyperband.dml
index 3c2614e41c..4eede73070 100644
--- a/scripts/builtin/hyperband.dml
+++ b/scripts/builtin/hyperband.dml
@@ -23,9 +23,9 @@
 # elimination. Through multiple parallel brackets and consecutive trials it will return the hyper parameter combination
 # which performed best on a validation dataset. A set of hyper parameter combinations is drawn from uniform distributions
 # with given ranges; Those make up the candidates for hyperband. Notes:
-#    hyperband is hard-coded for lmCG, and uses lmPredict for validation
-#    hyperband is hard-coded to use the number of iterations as a resource
-#    hyperband can only optimize continuous hyperparameters
+# hyperband is hard-coded for lmCG, and uses lmPredict for validation
+# hyperband is hard-coded to use the number of iterations as a resource
+# hyperband can only optimize continuous hyperparameters
 #
 # INPUT:
 # ------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/img_brightness.dml b/scripts/builtin/img_brightness.dml
index 965c0641cc..100ccb7588 100644
--- a/scripts/builtin/img_brightness.dml
+++ b/scripts/builtin/img_brightness.dml
@@ -22,7 +22,6 @@
 # The img_brightness-function is an image data augmentation function. It changes the brightness of the image.
 #
 # INPUT:
-
 # -----------------------------------------------------------------------------------------
 # img_in       Input matrix/image
 # value        The amount of brightness to be changed for the image
@@ -31,9 +30,7 @@
 #
 # OUTPUT:
 # ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out      Matrix[Double]               Output matrix/image
+# img_out  Output matrix/image
 # ----------------------------------------------------------------------------------------------------------------------
 
 m_img_brightness = function(Matrix[Double] img_in, Double value, Integer channel_max) return (Matrix[Double] img_out) {
diff --git a/scripts/builtin/knn.dml b/scripts/builtin/knn.dml
index 6492e777e3..19d4cfffb0 100644
--- a/scripts/builtin/knn.dml
+++ b/scripts/builtin/knn.dml
@@ -47,8 +47,8 @@
 #
 # OUTPUT:
 # ---------------------------------------------------------------------------------------------
-# NNR_matrix            ---
-# CL_matrix             ---
+# NNR_matrix            Applied clusters to X
+# CL_matrix             Cluster matrix
 # m_feature_importance  Feature importance value
 # ---------------------------------------------------------------------------------------------
 
diff --git a/scripts/builtin/matrixProfile.dml b/scripts/builtin/matrixProfile.dml
index ad1f8d6cf6..a06a8ce57e 100644
--- a/scripts/builtin/matrixProfile.dml
+++ b/scripts/builtin/matrixProfile.dml
@@ -22,12 +22,14 @@
 # Builtin function that computes the MatrixProfile of a time series efficiently
 # using the SCRIMP++ algorithm.
 #
-# References:
-# Yan Zhu et al.. 2018.
-#   Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
-#   2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
-#   DOI: 10.1109/ICDM.2018.00099.
-#   https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
+# .. code-block:: txt
+#
+#   References:
+#   Yan Zhu et al.. 2018.
+#     Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
+#     2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
+#     DOI: 10.1109/ICDM.2018.00099.
+#     https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
 #
 # INPUT:
 # ----------------------------------------------------------------------------------
diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml
index 6ed2fbb530..164fd54195 100644
--- a/scripts/builtin/steplm.dml
+++ b/scripts/builtin/steplm.dml
@@ -23,13 +23,16 @@
 # This method iteratively runs what-if scenarios and greedily selects the next best feature
 # until the Akaike information criterion (AIC) does not improve anymore. Each configuration trains a regression model
 # via lm, which in turn calls either the closed form lmDS or iterative lmGC.
-# 
-# return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
-# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                        Col.2: betas for shifted/rescaled X and intercept
+#
+# .. code-block:: txt 
+#
+#   return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+#           OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
+#   icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+#   icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#   icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                          Col.2: betas for shifted/rescaled X and intercept
+#
 # In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
 # name-value pair per each line, as follows:
 #