You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2022/06/10 14:22:29 UTC
[systemds] 02/05: [DOCS] Update builtin scripts docs
This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
commit a0254e5d2c204a1f841226eb399620f2b1bd97e6
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Fri Jun 10 16:18:33 2022 +0200
[DOCS] Update builtin scripts docs
---
scripts/builtin/WoE.dml | 9 +--
scripts/builtin/WoEApply.dml | 8 +-
scripts/builtin/abstain.dml | 16 ++--
scripts/builtin/confusionMatrix.dml | 7 +-
scripts/builtin/correctTypos.dml | 12 +--
scripts/builtin/correctTyposApply.dml | 12 +--
scripts/builtin/dbscanApply.dml | 4 -
scripts/builtin/denialConstraints.dml | 4 +-
scripts/builtin/fit_pipeline.dml | 4 -
scripts/builtin/fixInvalidLengthsApply.dml | 4 -
scripts/builtin/glm.dml | 118 +++++++++++++++--------------
scripts/builtin/glmPredict.dml | 64 +++++++++-------
scripts/builtin/hyperband.dml | 6 +-
scripts/builtin/img_brightness.dml | 5 +-
scripts/builtin/knn.dml | 4 +-
scripts/builtin/matrixProfile.dml | 14 ++--
scripts/builtin/steplm.dml | 17 +++--
17 files changed, 154 insertions(+), 154 deletions(-)
diff --git a/scripts/builtin/WoE.dml b/scripts/builtin/WoE.dml
index 003d9b48a6..7bc938eedd 100644
--- a/scripts/builtin/WoE.dml
+++ b/scripts/builtin/WoE.dml
@@ -30,13 +30,12 @@
#
# OUTPUT:
# ------------------------------------------------
-# X ---
-# Y ---
-# entropyMatrix ---
+# F Weighted X matrix where the entropy mask is applied
+# entropyMatrix A entropy matrix to apply to data
# ------------------------------------------------
m_WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask)
-return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) {
+return (Matrix[Double] F, Matrix[Double] entropyMatrix) {
tempX = replace(target=X, pattern=NaN, replacement=1)
entropyMatrix = matrix(0, rows=ncol(tempX), cols = max((tempX*mask)))
@@ -53,7 +52,7 @@ return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) {
}
}
- X = WoEApply(X, Y, entropyMatrix)
+ F = WoEApply(X, Y, entropyMatrix)
}
diff --git a/scripts/builtin/WoEApply.dml b/scripts/builtin/WoEApply.dml
index c27fae0d05..6f86a266d3 100644
--- a/scripts/builtin/WoEApply.dml
+++ b/scripts/builtin/WoEApply.dml
@@ -30,12 +30,12 @@
#
# OUTPUT:
# ------------------------------------------------
-# X ---
+# F Weighted X matrix where the entropy mask is applied
# ------------------------------------------------
m_WoEApply = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix)
-return (Matrix[Double] X) {
-
+return (Matrix[Double] F) {
+ F = matrix(1, nRow(X), nCol(X)) # allocate dense output matrix
for(i in 1:ncol(X))
{
if(sum(abs(entropyMatrix[i])) > 0)
@@ -46,7 +46,7 @@ return (Matrix[Double] X) {
resp = matrix(0, nrow(L), idx)
resp = (resp + t(seq(1, idx))) == L
resp = resp * entropy
- X[, i] = rowSums(resp)
+ F[, i] = rowSums(resp)
}
}
diff --git a/scripts/builtin/abstain.dml b/scripts/builtin/abstain.dml
index 6d9035101d..5a4c354327 100644
--- a/scripts/builtin/abstain.dml
+++ b/scripts/builtin/abstain.dml
@@ -24,16 +24,16 @@
#
# INPUT:
# -------------------------------------------------------------------------------------
-# X Location to read the matrix of feature vectors
-# Y Location to read the matrix with category labels
-# threshold ---
+# X matrix of feature vectors
+# Y matrix with category labels
+# threshold threshold to clear otherwise return X and Y unmodified
# verbose flag specifying if logging information should be printed
# -------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------
-# Xout ---
-# Yout ---
+# Xout abstained output X
+# Yout abstained output Y
# -------------------------------------------------------------------------------------
m_abstain = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
@@ -41,16 +41,14 @@ return (Matrix[Double] Xout, Matrix[Double] Yout)
{
Xout = X
Yout = Y
- # for(i in 1:100) {
if(min(Y) != max(Y))
{
- betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=FALSE)
+ betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose)
[prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE)
- # abstain = cbind(X, Y)
+
inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
if(sum(inc) > 0)
{
- # print("inc vector "+toString(inc))
Xout = removeEmpty(target = X, margin = "rows", select = (inc == 0) )
Yout = removeEmpty(target = Y, margin = "rows", select = (inc == 0) )
}
diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml
index a56a936cc9..c15b82621c 100644
--- a/scripts/builtin/confusionMatrix.dml
+++ b/scripts/builtin/confusionMatrix.dml
@@ -24,17 +24,14 @@
# After which, it calculates and returns the sum of classifications
# and the average of each true class.
#
+# .. code-block:: txt
+#
# True Labels
# 1 2
# 1 TP | FP
# Predictions ----+----
# 2 FN | TN
#
-# TP = True Positives
-# FP = False Positives
-# FN = False Negatives
-# TN = True Negatives
-#
# INPUT:
# --------------------------------------------------------------------------------
# P vector of Predictions
diff --git a/scripts/builtin/correctTypos.dml b/scripts/builtin/correctTypos.dml
index 2fddfaec1b..01d837a7a2 100644
--- a/scripts/builtin/correctTypos.dml
+++ b/scripts/builtin/correctTypos.dml
@@ -24,11 +24,13 @@
# and simply swaps strings that do not occur often with similar strings that
# occur more often
#
-# References:
-# Fred J. Damerau. 1964.
-# A technique for computer detection and correction of spelling errors.
-# Commun. ACM 7, 3 (March 1964), 171–176.
-# DOI:https://doi.org/10.1145/363958.363994
+# .. code-block:: txt
+#
+# References:
+# Fred J. Damerau. 1964.
+# A technique for computer detection and correction of spelling errors.
+# Commun. ACM 7, 3 (March 1964), 171–176.
+# DOI:https://doi.org/10.1145/363958.363994
#
# INPUT:
# ----------------------------------------------------------------------------------------
diff --git a/scripts/builtin/correctTyposApply.dml b/scripts/builtin/correctTyposApply.dml
index 050bb8132b..3ca4635bfa 100644
--- a/scripts/builtin/correctTyposApply.dml
+++ b/scripts/builtin/correctTyposApply.dml
@@ -24,11 +24,13 @@
# and simply swaps strings that do not occur often with similar strings that
# occur more often
#
-# References:
-# Fred J. Damerau. 1964.
-# A technique for computer detection and correction of spelling errors.
-# Commun. ACM 7, 3 (March 1964), 171–176.
-# DOI:https://doi.org/10.1145/363958.363994
+# .. code-block:: txt
+#
+# References:
+# Fred J. Damerau. 1964.
+# A technique for computer detection and correction of spelling errors.
+# Commun. ACM 7, 3 (March 1964), 171–176.
+# DOI:https://doi.org/10.1145/363958.363994
#
# TODO: future: add parameter for list of words that are sure to be correct
#
diff --git a/scripts/builtin/dbscanApply.dml b/scripts/builtin/dbscanApply.dml
index 4a7eb7e6ed..e3ab9723cb 100644
--- a/scripts/builtin/dbscanApply.dml
+++ b/scripts/builtin/dbscanApply.dml
@@ -23,8 +23,6 @@
#
# INPUT:
# ---------------------------------------------
-# NAME MEANING
-# ---------------------------------------------
# X The input Matrix to do outlier detection on.
# clusterModel Model of clusters to predict outliers against.
# eps Maximum distance between two points for one to be considered reachable for the other.
@@ -32,8 +30,6 @@
#
# OUTPUT:
# ----------------------------------------------
-# NAME MEANING
-# ----------------------------------------------
# outlierPoints Predicted outliers
# ----------------------------------------------
diff --git a/scripts/builtin/denialConstraints.dml b/scripts/builtin/denialConstraints.dml
index 23453979e1..d2dc3cfb40 100644
--- a/scripts/builtin/denialConstraints.dml
+++ b/scripts/builtin/denialConstraints.dml
@@ -21,6 +21,8 @@
# This function considers some constraints indicating statements that can NOT happen in the data (denial constraints).
#
+# .. code-block:: txt
+#
# EXAMPLE:
# dataFrame:
#
@@ -48,7 +50,7 @@
# 4 variableCompare TRUE discipline B yrs.service > yrs.since.phd
#
#
-# Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
+# Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
#
# INPUT:
# ----------------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/fit_pipeline.dml b/scripts/builtin/fit_pipeline.dml
index 4b4665e59e..96023f7b49 100644
--- a/scripts/builtin/fit_pipeline.dml
+++ b/scripts/builtin/fit_pipeline.dml
@@ -25,8 +25,6 @@
#
# INPUT:
# -------------------------------------------------------------------------------
-# NAME MEANING
-# -------------------------------------------------------------------------------
# trainData ---
# testData ---
# metaData ---
@@ -41,8 +39,6 @@
#
# OUTPUT:
# ------------------------------------------------------------------------------------------------
-# NAME MEANING
-# ------------------------------------------------------------------------------------------------
# scores ---
# ------------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/fixInvalidLengthsApply.dml b/scripts/builtin/fixInvalidLengthsApply.dml
index a8c10dc052..e566bcbc68 100644
--- a/scripts/builtin/fixInvalidLengthsApply.dml
+++ b/scripts/builtin/fixInvalidLengthsApply.dml
@@ -23,8 +23,6 @@
#
# INPUT:
# ------------------------
-# NAME MEANING
-# ------------------------
# X ---
# mask ---
# ql ---
@@ -33,8 +31,6 @@
#
# OUTPUT:
# ------------------------
-# NAME MEANING
-# ------------------------
# out ---
# M ---
# ------------------------
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
index c07a98337a..44b0c8cb1e 100644
--- a/scripts/builtin/glm.dml
+++ b/scripts/builtin/glm.dml
@@ -25,67 +25,71 @@
# In addition, some GLM statistics are provided as console output by setting verbose=TRUE, one comma-separated name-value
# pair per each line, as follows:
#
-# ----------------------------------------------------------------------------------------------------------------------
-# TERMINATION_CODE A positive integer indicating success/failure as follows:
-# 1 = Converged successfully; 2 = Maximum number of iterations reached;
-# 3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
-# BETA_MIN Smallest beta value (regression coefficient), excluding the intercept
-# BETA_MIN_INDEX Column index for the smallest beta value
-# BETA_MAX Largest beta value (regression coefficient), excluding the intercept
-# BETA_MAX_INDEX Column index for the largest beta value
-# INTERCEPT Intercept value, or NaN if there is no intercept (if icpt=0)
-# DISPERSION Dispersion used to scale deviance, provided as "disp" input parameter
-# or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
-# DISPERSION_EST Dispersion estimated from the dataset
-# DEVIANCE_UNSCALED Deviance from the saturated model, assuming dispersion == 1.0
-# DEVIANCE_SCALED Deviance from the saturated model, scaled by the DISPERSION value
-# ----------------------------------------------------------------------------------------------------------------------
+# .. code-block:: txt
#
-# The Log file, when requested, contains the following per-iteration variables in CSV format,
-# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
-#
-# ----------------------------------------------------------------------------------------------------------------------
-# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this outer iteration
-# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise
-# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. "beta") to new point
-# OBJECTIVE The loss function we minimize (i.e. negative partial log-likelihood)
-# OBJ_DROP_REAL Reduction in the objective during this iteration, actual value
-# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic approximation
-# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update the trust region
-# GRADIENT_NORM L2-norm of the loss function gradient (NOTE: sometimes omitted)
-# LINEAR_TERM_MIN The minimum value of X %*% beta, used to check for overflows
-# LINEAR_TERM_MAX The maximum value of X %*% beta, used to check for overflows
-# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old point restored
-# TRUST_DELTA Updated trust region size, the "delta"
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# TERMINATION_CODE A positive integer indicating success/failure as follows:
+# 1 = Converged successfully; 2 = Maximum number of iterations reached;
+# 3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+# BETA_MIN Smallest beta value (regression coefficient), excluding the intercept
+# BETA_MIN_INDEX Column index for the smallest beta value
+# BETA_MAX Largest beta value (regression coefficient), excluding the intercept
+# BETA_MAX_INDEX Column index for the largest beta value
+# INTERCEPT Intercept value, or NaN if there is no intercept (if icpt=0)
+# DISPERSION Dispersion used to scale deviance, provided as "disp" input parameter
+# or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+# DISPERSION_EST Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED Deviance from the saturated model, assuming dispersion == 1.0
+# DEVIANCE_SCALED Deviance from the saturated model, scaled by the DISPERSION value
+# --------------------------------------------------------------------------------------------
+#
+# The Log file, when requested, contains the following per-iteration variables in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+#
+# --------------------------------------------------------------------------------------------
+# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this outer iteration
+# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. "beta") to new point
+# OBJECTIVE The loss function we minimize (i.e. negative partial log-likelihood)
+# OBJ_DROP_REAL Reduction in the objective during this iteration, actual value
+# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic approximation
+# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update the trust region
+# GRADIENT_NORM L2-norm of the loss function gradient (NOTE: sometimes omitted)
+# LINEAR_TERM_MIN The minimum value of X %*% beta, used to check for overflows
+# LINEAR_TERM_MAX The maximum value of X %*% beta, used to check for overflows
+# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old point restored
+# TRUST_DELTA Updated trust region size, the "delta"
+# --------------------------------------------------------------------------------------------
#
# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
# AND LINK FUNCTIONS:
#
-# dfam vpow link lpow Distribution.link nical?
-# ----------------------------------------------------------------------------------------------------------------------
-# 1 0.0 1 -1.0 Gaussian.inverse
-# 1 0.0 1 0.0 Gaussian.log
-# 1 0.0 1 1.0 Gaussian.id Yes
-# 1 1.0 1 0.0 Poisson.log Yes
-# 1 1.0 1 0.5 Poisson.sqrt
-# 1 1.0 1 1.0 Poisson.id
-# 1 2.0 1 -1.0 Gamma.inverse Yes
-# 1 2.0 1 0.0 Gamma.log
-# 1 2.0 1 1.0 Gamma.id
-# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes
-# 1 3.0 1 -1.0 InvGaussian.inverse
-# 1 3.0 1 0.0 InvGaussian.log
-# 1 3.0 1 1.0 InvGaussian.id
-# 1 * 1 * AnyVariance.AnyLink
-# ----------------------------------------------------------------------------------------------------------------------
-# 2 * 1 0.0 Binomial.log
-# 2 * 1 0.5 Binomial.sqrt
-# 2 * 2 * Binomial.logit Yes
-# 2 * 3 * Binomial.probit
-# 2 * 4 * Binomial.cloglog
-# 2 * 5 * Binomial.cauchit
-# ----------------------------------------------------------------------------------------------------------------------
+# .. code-block:: txt
+#
+# dfam vpow link lpow Distribution.link nical?
+# ---------------------------------------------------
+# 1 0.0 1 -1.0 Gaussian.inverse
+# 1 0.0 1 0.0 Gaussian.log
+# 1 0.0 1 1.0 Gaussian.id Yes
+# 1 1.0 1 0.0 Poisson.log Yes
+# 1 1.0 1 0.5 Poisson.sqrt
+# 1 1.0 1 1.0 Poisson.id
+# 1 2.0 1 -1.0 Gamma.inverse Yes
+# 1 2.0 1 0.0 Gamma.log
+# 1 2.0 1 1.0 Gamma.id
+# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes
+# 1 3.0 1 -1.0 InvGaussian.inverse
+# 1 3.0 1 0.0 InvGaussian.log
+# 1 3.0 1 1.0 InvGaussian.id
+# 1 * 1 * AnyVariance.AnyLink
+# ---------------------------------------------------
+# 2 * 1 0.0 Binomial.log
+# 2 * 1 0.5 Binomial.sqrt
+# 2 * 2 * Binomial.logit Yes
+# 2 * 3 * Binomial.probit
+# 2 * 4 * Binomial.cloglog
+# 2 * 5 * Binomial.cauchit
+# ---------------------------------------------------
#
# INPUT:
# --------------------------------------------------------------------------------------------
@@ -111,7 +115,7 @@
# mii Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
# verbose if the Algorithm should be verbose
# ------------------------------------------------------------------------------------------
-#
+#
# OUTPUT:
# --------------------------------------------------------------------------------------------
# beta Matrix beta, whose size depends on icpt:
diff --git a/scripts/builtin/glmPredict.dml b/scripts/builtin/glmPredict.dml
index 3c0e09c6ba..cde4e17ea8 100644
--- a/scripts/builtin/glmPredict.dml
+++ b/scripts/builtin/glmPredict.dml
@@ -22,35 +22,41 @@
# Applies the estimated parameters of a GLM type regression to a new dataset
#
# Additional statistics are printed one per each line, in the following
-# CSV format: NAME,[COLUMN],[SCALED],VALUE
-# ---
-# NAME is the string identifier for the statistic, see the table below.
-# COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
-# note that a Binomial/Multinomial one-column Y input is converted into multi-column.
-# SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
-# dispersion parameter (disp) scaling has been applied to this statistic.
-# VALUE is the value of the statistic.
-# ---
-# NAME COLUMN SCALED MEANING
-# ---------------------------------------------------------------------------------------------
-# LOGLHOOD_Z + Log-Likelihood Z-score (in st.dev's from mean)
-# LOGLHOOD_Z_PVAL + Log-Likelihood Z-score p-value
-# PEARSON_X2 + Pearson residual X^2 statistic
-# PEARSON_X2_BY_DF + Pearson X^2 divided by degrees of freedom
-# PEARSON_X2_PVAL + Pearson X^2 p-value
-# DEVIANCE_G2 + Deviance from saturated model G^2 statistic
-# DEVIANCE_G2_BY_DF + Deviance G^2 divided by degrees of freedom
-# DEVIANCE_G2_PVAL + Deviance G^2 p-value
-# AVG_TOT_Y + Average of Y column for a single response value
-# STDEV_TOT_Y + St.Dev. of Y column for a single response value
-# AVG_RES_Y + Average of column residual, i.e. of Y - mean(Y|X)
-# STDEV_RES_Y + St.Dev. of column residual, i.e. of Y - mean(Y|X)
-# PRED_STDEV_RES + + Model-predicted St.Dev. of column residual
-# R2 + R^2 of Y column residual with bias included
-# ADJUSTED_R2 + Adjusted R^2 of Y column residual with bias included
-# R2_NOBIAS + R^2 of Y column residual with bias subtracted
-# ADJUSTED_R2_NOBIAS + Adjusted R^2 of Y column residual with bias subtracted
-# ---------------------------------------------------------------------------------------------
+#
+# .. code-block:: txt
+#
+# CSV format: NAME,[COLUMN],[SCALED],VALUE
+# ---
+# NAME is the string identifier for the statistic, see the table below.
+# COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
+# note that a Binomial/Multinomial one-column Y input is converted into multi-column.
+# SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
+# dispersion parameter (disp) scaling has been applied to this statistic.
+# VALUE is the value of the statistic.
+# ---
+#
+# .. code-block:: txt
+#
+# NAME COLUMN SCALED MEANING
+# ---------------------------------------------------------------------------------------------
+# LOGLHOOD_Z + Log-Likelihood Z-score (in st.dev's from mean)
+# LOGLHOOD_Z_PVAL + Log-Likelihood Z-score p-value
+# PEARSON_X2 + Pearson residual X^2 statistic
+# PEARSON_X2_BY_DF + Pearson X^2 divided by degrees of freedom
+# PEARSON_X2_PVAL + Pearson X^2 p-value
+# DEVIANCE_G2 + Deviance from saturated model G^2 statistic
+# DEVIANCE_G2_BY_DF + Deviance G^2 divided by degrees of freedom
+# DEVIANCE_G2_PVAL + Deviance G^2 p-value
+# AVG_TOT_Y + Average of Y column for a single response value
+# STDEV_TOT_Y + St.Dev. of Y column for a single response value
+# AVG_RES_Y + Average of column residual, i.e. of Y - mean(Y|X)
+# STDEV_RES_Y + St.Dev. of column residual, i.e. of Y - mean(Y|X)
+# PRED_STDEV_RES + + Model-predicted St.Dev. of column residual
+# R2 + R^2 of Y column residual with bias included
+# ADJUSTED_R2 + Adjusted R^2 of Y column residual with bias included
+# R2_NOBIAS + R^2 of Y column residual with bias subtracted
+# ADJUSTED_R2_NOBIAS + Adjusted R^2 of Y column residual with bias subtracted
+# ---------------------------------------------------------------------------------------------
#
# INPUT:
# -------------------------------------------------------------------
diff --git a/scripts/builtin/hyperband.dml b/scripts/builtin/hyperband.dml
index 3c2614e41c..4eede73070 100644
--- a/scripts/builtin/hyperband.dml
+++ b/scripts/builtin/hyperband.dml
@@ -23,9 +23,9 @@
# elimination. Through multiple parallel brackets and consecutive trials it will return the hyper parameter combination
# which performed best on a validation dataset. A set of hyper parameter combinations is drawn from uniform distributions
# with given ranges; Those make up the candidates for hyperband. Notes:
-# hyperband is hard-coded for lmCG, and uses lmPredict for validation
-# hyperband is hard-coded to use the number of iterations as a resource
-# hyperband can only optimize continuous hyperparameters
+# hyperband is hard-coded for lmCG, and uses lmPredict for validation
+# hyperband is hard-coded to use the number of iterations as a resource
+# hyperband can only optimize continuous hyperparameters
#
# INPUT:
# ------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/img_brightness.dml b/scripts/builtin/img_brightness.dml
index 965c0641cc..100ccb7588 100644
--- a/scripts/builtin/img_brightness.dml
+++ b/scripts/builtin/img_brightness.dml
@@ -22,7 +22,6 @@
# The img_brightness-function is an image data augmentation function. It changes the brightness of the image.
#
# INPUT:
-
# -----------------------------------------------------------------------------------------
# img_in Input matrix/image
# value The amount of brightness to be changed for the image
@@ -31,9 +30,7 @@
#
# OUTPUT:
# ----------------------------------------------------------------------------------------------------------------------
-# NAME TYPE MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out Matrix[Double] Output matrix/image
+# img_out Output matrix/image
# ----------------------------------------------------------------------------------------------------------------------
m_img_brightness = function(Matrix[Double] img_in, Double value, Integer channel_max) return (Matrix[Double] img_out) {
diff --git a/scripts/builtin/knn.dml b/scripts/builtin/knn.dml
index 6492e777e3..19d4cfffb0 100644
--- a/scripts/builtin/knn.dml
+++ b/scripts/builtin/knn.dml
@@ -47,8 +47,8 @@
#
# OUTPUT:
# ---------------------------------------------------------------------------------------------
-# NNR_matrix ---
-# CL_matrix ---
+# NNR_matrix Applied clusters to X
+# CL_matrix Cluster matrix
# m_feature_importance Feature importance value
# ---------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/matrixProfile.dml b/scripts/builtin/matrixProfile.dml
index ad1f8d6cf6..a06a8ce57e 100644
--- a/scripts/builtin/matrixProfile.dml
+++ b/scripts/builtin/matrixProfile.dml
@@ -22,12 +22,14 @@
# Builtin function that computes the MatrixProfile of a time series efficiently
# using the SCRIMP++ algorithm.
#
-# References:
-# Yan Zhu et al.. 2018.
-# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
-# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
-# DOI: 10.1109/ICDM.2018.00099.
-# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
+# .. code-block:: txt
+#
+# References:
+# Yan Zhu et al.. 2018.
+# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
+# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
+# DOI: 10.1109/ICDM.2018.00099.
+# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
#
# INPUT:
# ----------------------------------------------------------------------------------
diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml
index 6ed2fbb530..164fd54195 100644
--- a/scripts/builtin/steplm.dml
+++ b/scripts/builtin/steplm.dml
@@ -23,13 +23,16 @@
# This method iteratively runs what-if scenarios and greedily selects the next best feature
# until the Akaike information criterion (AIC) does not improve anymore. Each configuration trains a regression model
# via lm, which in turn calls either the closed form lmDS or iterative lmGC.
-#
-# return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
-# OUTPUT SIZE: OUTPUT CONTENTS: HOW TO PREDICT Y FROM X AND B:
-# icpt=0: ncol(X) x 1 Betas for X only Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1 Betas for X and intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2 Col.1: betas for X & intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# Col.2: betas for shifted/rescaled X and intercept
+#
+# .. code-block:: txt
+#
+# return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+# OUTPUT SIZE: OUTPUT CONTENTS: HOW TO PREDICT Y FROM X AND B:
+# icpt=0: ncol(X) x 1 Betas for X only Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
+# icpt=1: ncol(X)+1 x 1 Betas for X and intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# icpt=2: ncol(X)+1 x 2 Col.1: betas for X & intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# Col.2: betas for shifted/rescaled X and intercept
+#
# In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
# name-value pair per each line, as follows:
#