You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2022/06/10 11:22:04 UTC
[systemds] branch main updated: [SYSTEMDS-3082] Generating builtin docs

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 8d0195a666 [SYSTEMDS-3082] Generating builtin docs
8d0195a666 is described below

commit 8d0195a666d3871582b9691c2b97a65cee387780
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Wed Jun 8 23:46:06 2022 +0200

    [SYSTEMDS-3082] Generating builtin docs
    
    This commit change the builtin docs to be more consistent with the
    actual builtin by removing the types and defaults from the docs, and
    only having such definitions in the functions themselves.
    Furthermore this commit remove the old generating doc parser that
    fit the old design of docs, and modify the python docs parser to
    work and produce python docs correctly.
    
    Closes #1632
---
 scripts/builtin/WoE.dml                            |  19 +-
 scripts/builtin/WoEApply.dml                       |  14 +
 scripts/builtin/_genBuiltinDocs.py                 | 370 ---------------------
 scripts/builtin/abstain.dml                        |  29 +-
 scripts/builtin/als.dml                            |  54 ++-
 scripts/builtin/alsCG.dml                          |  54 ++-
 scripts/builtin/alsDS.dml                          |  38 +--
 scripts/builtin/alsPredict.dml                     |  24 +-
 scripts/builtin/alsTopkPredict.dml                 |  29 +-
 scripts/builtin/apply_pipeline.dml                 |  38 +--
 scripts/builtin/arima.dml                          |  38 +--
 scripts/builtin/autoencoder_2layer.dml             |  62 ++--
 scripts/builtin/bandit.dml                         |  53 ++-
 scripts/builtin/bivar.dml                          |  34 +-
 scripts/builtin/components.dml                     |  36 +-
 scripts/builtin/confusionMatrix.dml                |  31 +-
 scripts/builtin/cor.dml                            |  18 +-
 scripts/builtin/correctTypos.dml                   |  25 +-
 scripts/builtin/correctTyposApply.dml              |  30 +-
 scripts/builtin/cox.dml                            | 108 +++---
 scripts/builtin/cspline.dml                        |  34 +-
 scripts/builtin/csplineCG.dml                      |  34 +-
 scripts/builtin/csplineDS.dml                      |  25 +-
 scripts/builtin/cvlm.dml                           |  30 +-
 scripts/builtin/dbscan.dml                         |  24 +-
 scripts/builtin/dbscanApply.dml                    |  31 +-
 scripts/builtin/decisionTree.dml                   |  66 ++--
 scripts/builtin/decisionTreePredict.dml            |  57 ++--
 scripts/builtin/deepWalk.dml                       |  30 +-
 scripts/builtin/denialConstraints.dml              |  60 ++--
 scripts/builtin/discoverFD.dml                     |  25 +-
 scripts/builtin/dist.dml                           |  20 +-
 scripts/builtin/dmv.dml                            |  25 +-
 scripts/builtin/ema.dml                            |  32 +-
 scripts/builtin/executePipeline.dml                |  54 ++-
 scripts/builtin/ffPredict.dml                      |  23 +-
 scripts/builtin/ffTrain.dml                        |  44 ++-
 scripts/builtin/fit_pipeline.dml                   |  41 ++-
 scripts/builtin/fixInvalidLengths.dml              |  28 +-
 scripts/builtin/fixInvalidLengthsApply.dml         |  32 +-
 scripts/builtin/frameSort.dml                      |  24 +-
 scripts/builtin/frequencyEncode.dml                |  18 +-
 scripts/builtin/frequencyEncodeApply.dml           |  14 +
 scripts/builtin/garch.dml                          |  48 ++-
 scripts/builtin/gaussianClassifier.dml             |  32 +-
 scripts/builtin/getAccuracy.dml                    |  22 +-
 scripts/builtin/glm.dml                            |  71 ++--
 scripts/builtin/glmPredict.dml                     |  66 ++--
 scripts/builtin/gmm.dml                            |  51 ++-
 scripts/builtin/gmmPredict.dml                     |  31 +-
 scripts/builtin/gnmf.dml                           |  26 +-
 scripts/builtin/gridSearch.dml                     |  59 ++--
 scripts/builtin/hospitalResidencyMatch.dml         |  52 ++-
 scripts/builtin/hyperband.dml                      |  39 +--
 scripts/builtin/img_brightness.dml                 |  18 +-
 scripts/builtin/img_crop.dml                       |  28 +-
 scripts/builtin/img_cutout.dml                     |  28 +-
 scripts/builtin/img_invert.dml                     |  22 +-
 scripts/builtin/img_mirror.dml                     |  23 +-
 scripts/builtin/img_posterize.dml                  |  22 +-
 scripts/builtin/img_rotate.dml                     |  22 +-
 scripts/builtin/img_sample_pairing.dml             |  24 +-
 scripts/builtin/img_shear.dml                      |  24 +-
 scripts/builtin/img_transform.dml                  |  26 +-
 scripts/builtin/img_translate.dml                  |  28 +-
 scripts/builtin/impurityMeasures.dml               |  46 ++-
 scripts/builtin/imputeByFD.dml                     |  28 +-
 scripts/builtin/imputeByFDApply.dml                |  26 +-
 scripts/builtin/imputeByMean.dml                   |  20 +-
 scripts/builtin/imputeByMeanApply.dml              |  20 +-
 scripts/builtin/imputeByMedian.dml                 |  26 +-
 scripts/builtin/imputeByMedianApply.dml            |  20 +-
 scripts/builtin/imputeByMode.dml                   |  18 +-
 scripts/builtin/imputeByModeApply.dml              |  20 +-
 scripts/builtin/intersect.dml                      |  22 +-
 scripts/builtin/km.dml                             | 122 ++++---
 scripts/builtin/kmeans.dml                         |  36 +-
 scripts/builtin/kmeansPredict.dml                  |  20 +-
 scripts/builtin/knn.dml                            |  63 ++--
 scripts/builtin/knnGraph.dml                       |  13 +-
 scripts/builtin/knnbf.dml                          |  22 +-
 scripts/builtin/l2svm.dml                          |  38 +--
 scripts/builtin/l2svmPredict.dml                   |  28 +-
 scripts/builtin/lasso.dml                          |  30 +-
 scripts/builtin/lenetPredict.dml                   |  28 +-
 scripts/builtin/lenetTrain.dml                     |  48 ++-
 scripts/builtin/lm.dml                             |  39 +--
 scripts/builtin/lmCG.dml                           |  37 +--
 scripts/builtin/lmDS.dml                           |  37 +--
 scripts/builtin/lmPredict.dml                      |  30 +-
 scripts/builtin/logSumExp.dml                      |  26 +-
 scripts/builtin/matrixProfile.dml                  |  32 +-
 scripts/builtin/mcc.dml                            |  26 +-
 scripts/builtin/mdedup.dml                         |  32 +-
 scripts/builtin/mice.dml                           |  36 +-
 scripts/builtin/miceApply.dml                      |  42 ++-
 scripts/builtin/msvm.dml                           |  36 +-
 scripts/builtin/msvmPredict.dml                    |  24 +-
 scripts/builtin/multiLogReg.dml                    |  36 +-
 scripts/builtin/multiLogRegPredict.dml             |  28 +-
 scripts/builtin/na_locf.dml                        |  24 +-
 scripts/builtin/naiveBayes.dml                     |  28 +-
 scripts/builtin/naiveBayesPredict.dml              |  24 +-
 scripts/builtin/normalize.dml                      |  22 +-
 scripts/builtin/normalizeApply.dml                 |  22 +-
 scripts/builtin/outlier.dml                        |  22 +-
 scripts/builtin/outlierByArima.dml                 |  42 ++-
 scripts/builtin/outlierByIQR.dml                   |  41 +--
 scripts/builtin/outlierByIQRApply.dml              |  35 +-
 scripts/builtin/outlierBySd.dml                    |  28 +-
 scripts/builtin/outlierBySdApply.dml               |  43 +--
 scripts/builtin/pca.dml                            |  30 +-
 scripts/builtin/pcaInverse.dml                     |  24 +-
 scripts/builtin/pcaTransform.dml                   |  24 +-
 scripts/builtin/pnmf.dml                           |  28 +-
 scripts/builtin/ppca.dml                           |  31 +-
 scripts/builtin/randomForest.dml                   |  86 +++--
 scripts/builtin/scale.dml                          |  26 +-
 scripts/builtin/scaleApply.dml                     |  22 +-
 scripts/builtin/scaleMinMax.dml                    |  18 +-
 scripts/builtin/selectByVarThresh.dml              |  20 +-
 scripts/builtin/setdiff.dml                        |  20 +-
 scripts/builtin/sherlock.dml                       |  38 +--
 scripts/builtin/sherlockPredict.dml                |  40 +--
 scripts/builtin/shortestPath.dml                   |  43 ++-
 scripts/builtin/sigmoid.dml                        |  18 +-
 scripts/builtin/slicefinder.dml                    |  46 ++-
 scripts/builtin/smote.dml                          |  26 +-
 scripts/builtin/softmax.dml                        |  20 +-
 scripts/builtin/split.dml                          |  50 ++-
 scripts/builtin/splitBalanced.dml                  |  30 +-
 scripts/builtin/stableMarriage.dml                 | 108 +++---
 scripts/builtin/statsNA.dml                        |  49 ++-
 scripts/builtin/steplm.dml                         |  41 ++-
 scripts/builtin/stratstats.dml                     | 117 ++++---
 scripts/builtin/symmetricDifference.dml            |  20 +-
 scripts/builtin/tSNE.dml                           |  36 +-
 scripts/builtin/toOneHot.dml                       |  22 +-
 scripts/builtin/tomeklink.dml                      |  28 +-
 scripts/builtin/topk_cleaning.dml                  |  30 --
 scripts/builtin/underSampling.dml                  |  24 +-
 scripts/builtin/union.dml                          |  20 +-
 scripts/builtin/unique.dml                         |  18 +-
 scripts/builtin/univar.dml                         |  22 +-
 scripts/builtin/vectorToCsv.dml                    |  18 +-
 scripts/builtin/winsorize.dml                      |  21 +-
 scripts/builtin/winsorizeApply.dml                 |  25 +-
 scripts/builtin/xdummy1.dml                        |  18 +-
 scripts/builtin/xdummy2.dml                        |  20 +-
 scripts/builtin/xgboost.dml                        |  71 ++--
 scripts/builtin/xgboostPredictClassification.dml   |  22 +-
 scripts/builtin/xgboostPredictRegression.dml       |  22 +-
 src/main/python/generator/generator.py             |  63 ++--
 src/main/python/generator/parser.py                | 102 +++---
 .../systemds/operator/algorithm/builtin/WoE.py     |   9 +
 .../operator/algorithm/builtin/WoEApply.py         |   9 +
 .../systemds/operator/algorithm/builtin/abstain.py |   9 +-
 .../systemds/operator/algorithm/builtin/als.py     |  26 +-
 .../systemds/operator/algorithm/builtin/alsCG.py   |  22 +-
 .../systemds/operator/algorithm/builtin/alsDS.py   |  16 +-
 .../operator/algorithm/builtin/alsPredict.py       |  12 +
 .../operator/algorithm/builtin/alsTopkPredict.py   |  12 +-
 .../operator/algorithm/builtin/apply_pipeline.py   |  18 +
 .../systemds/operator/algorithm/builtin/arima.py   |   8 +-
 .../algorithm/builtin/autoencoder_2layer.py        |  21 +-
 .../systemds/operator/algorithm/builtin/bandit.py  |  24 ++
 .../systemds/operator/algorithm/builtin/bivar.py   |  13 +-
 .../operator/algorithm/builtin/components.py       |  11 +-
 .../operator/algorithm/builtin/confusionMatrix.py  |  22 +-
 .../systemds/operator/algorithm/builtin/cor.py     |   7 +
 .../operator/algorithm/builtin/correctTypos.py     |  15 +-
 .../algorithm/builtin/correctTyposApply.py         |  19 +-
 .../systemds/operator/algorithm/builtin/cox.py     |  59 +++-
 .../systemds/operator/algorithm/builtin/cspline.py |  14 +-
 .../operator/algorithm/builtin/csplineCG.py        |  12 +-
 .../operator/algorithm/builtin/csplineDS.py        |  10 +-
 .../systemds/operator/algorithm/builtin/cvlm.py    |  11 +-
 .../systemds/operator/algorithm/builtin/dbscan.py  |   8 +-
 .../operator/algorithm/builtin/dbscanApply.py      |   9 +-
 .../operator/algorithm/builtin/decisionTree.py     |  30 +-
 .../algorithm/builtin/decisionTreePredict.py       |  38 ++-
 .../operator/algorithm/builtin/deepWalk.py         |   7 +-
 .../algorithm/builtin/denialConstraints.py         |  72 ++--
 .../operator/algorithm/builtin/discoverFD.py       |  12 +-
 .../systemds/operator/algorithm/builtin/dist.py    |   7 +
 .../systemds/operator/algorithm/builtin/dmv.py     |   8 +-
 .../systemds/operator/algorithm/builtin/ema.py     |  11 +-
 .../operator/algorithm/builtin/executePipeline.py  |  17 +-
 .../operator/algorithm/builtin/ffPredict.py        |   8 +-
 .../systemds/operator/algorithm/builtin/ffTrain.py |  14 +-
 .../operator/algorithm/builtin/fit_pipeline.py     |  19 ++
 .../algorithm/builtin/fixInvalidLengths.py         |  10 +
 .../algorithm/builtin/fixInvalidLengthsApply.py    |  11 +
 .../operator/algorithm/builtin/frameSort.py        |   9 +
 .../operator/algorithm/builtin/frequencyEncode.py  |   8 +
 .../algorithm/builtin/frequencyEncodeApply.py      |   8 +
 .../systemds/operator/algorithm/builtin/garch.py   |  12 +-
 .../algorithm/builtin/gaussianClassifier.py        |  13 +-
 .../operator/algorithm/builtin/getAccuracy.py      |   8 +-
 .../systemds/operator/algorithm/builtin/glm.py     |  77 ++++-
 .../operator/algorithm/builtin/glmPredict.py       |  51 ++-
 .../systemds/operator/algorithm/builtin/gmm.py     |  13 +-
 .../operator/algorithm/builtin/gmmPredict.py       |  11 +-
 .../systemds/operator/algorithm/builtin/gnmf.py    |  13 +-
 .../operator/algorithm/builtin/gridSearch.py       |  27 +-
 .../algorithm/builtin/hospitalResidencyMatch.py    |  50 ++-
 .../operator/algorithm/builtin/hyperband.py        |  22 +-
 .../operator/algorithm/builtin/img_brightness.py   |   7 +-
 .../operator/algorithm/builtin/img_crop.py         |   7 +-
 .../operator/algorithm/builtin/img_cutout.py       |   7 +-
 .../operator/algorithm/builtin/img_invert.py       |   7 +-
 .../operator/algorithm/builtin/img_mirror.py       |   8 +-
 .../operator/algorithm/builtin/img_posterize.py    |  10 +-
 .../operator/algorithm/builtin/img_rotate.py       |   8 +-
 .../algorithm/builtin/img_sample_pairing.py        |  10 +-
 .../operator/algorithm/builtin/img_shear.py        |   8 +-
 .../operator/algorithm/builtin/img_transform.py    |  10 +-
 .../operator/algorithm/builtin/img_translate.py    |   9 +-
 .../operator/algorithm/builtin/impurityMeasures.py |  20 ++
 .../operator/algorithm/builtin/imputeByFD.py       |   8 +-
 .../operator/algorithm/builtin/imputeByFDApply.py  |   7 +-
 .../operator/algorithm/builtin/imputeByMean.py     |   9 +
 .../algorithm/builtin/imputeByMeanApply.py         |   9 +
 .../operator/algorithm/builtin/imputeByMedian.py   |   9 +
 .../algorithm/builtin/imputeByMedianApply.py       |   9 +
 .../operator/algorithm/builtin/imputeByMode.py     |   8 +
 .../algorithm/builtin/imputeByModeApply.py         |   9 +
 .../operator/algorithm/builtin/intersect.py        |   8 +
 .../systemds/operator/algorithm/builtin/km.py      |  60 +++-
 .../systemds/operator/algorithm/builtin/kmeans.py  |   9 +-
 .../operator/algorithm/builtin/kmeansPredict.py    |   8 +
 .../systemds/operator/algorithm/builtin/knn.py     |  38 ++-
 .../operator/algorithm/builtin/knnGraph.py         |   8 +
 .../systemds/operator/algorithm/builtin/knnbf.py   |   9 +
 .../systemds/operator/algorithm/builtin/l2svm.py   |  15 +-
 .../operator/algorithm/builtin/l2svmPredict.py     |   9 +-
 .../systemds/operator/algorithm/builtin/lasso.py   |   9 +-
 .../operator/algorithm/builtin/lenetPredict.py     |   8 +-
 .../operator/algorithm/builtin/lenetTrain.py       |  12 +-
 .../systemds/operator/algorithm/builtin/lm.py      |  12 +-
 .../systemds/operator/algorithm/builtin/lmCG.py    |  11 +-
 .../systemds/operator/algorithm/builtin/lmDS.py    |  11 +-
 .../operator/algorithm/builtin/lmPredict.py        |  11 +-
 .../operator/algorithm/builtin/logSumExp.py        |  11 +-
 .../operator/algorithm/builtin/matrixProfile.py    |  18 +-
 .../systemds/operator/algorithm/builtin/mcc.py     |   9 +
 .../systemds/operator/algorithm/builtin/mdedup.py  |  13 +-
 .../systemds/operator/algorithm/builtin/mice.py    |  15 +-
 .../operator/algorithm/builtin/miceApply.py        |  19 +-
 .../systemds/operator/algorithm/builtin/msvm.py    |  11 +-
 .../operator/algorithm/builtin/msvmPredict.py      |   9 +
 .../operator/algorithm/builtin/multiLogReg.py      |  13 +-
 .../algorithm/builtin/multiLogRegPredict.py        |  10 +-
 .../systemds/operator/algorithm/builtin/na_locf.py |   8 +-
 .../operator/algorithm/builtin/naiveBayes.py       |  12 +-
 .../algorithm/builtin/naiveBayesPredict.py         |   9 +
 .../operator/algorithm/builtin/normalize.py        |   8 +
 .../operator/algorithm/builtin/normalizeApply.py   |  12 +
 .../systemds/operator/algorithm/builtin/outlier.py |   9 +-
 .../operator/algorithm/builtin/outlierByArima.py   |  11 +-
 .../operator/algorithm/builtin/outlierByIQR.py     |  10 +-
 .../algorithm/builtin/outlierByIQRApply.py         |  12 +-
 .../operator/algorithm/builtin/outlierBySd.py      |   9 +-
 .../operator/algorithm/builtin/outlierBySdApply.py |  11 +-
 .../systemds/operator/algorithm/builtin/pca.py     |   7 +-
 .../operator/algorithm/builtin/pcaInverse.py       |  12 +
 .../operator/algorithm/builtin/pcaTransform.py     |  12 +
 .../systemds/operator/algorithm/builtin/pnmf.py    |  13 +-
 .../systemds/operator/algorithm/builtin/ppca.py    |   9 +-
 .../operator/algorithm/builtin/randomForest.py     |  34 +-
 .../systemds/operator/algorithm/builtin/scale.py   |   7 +-
 .../operator/algorithm/builtin/scaleApply.py       |   9 +
 .../operator/algorithm/builtin/scaleMinMax.py      |   7 +
 .../algorithm/builtin/selectByVarThresh.py         |   8 +
 .../systemds/operator/algorithm/builtin/setdiff.py |   8 +
 .../operator/algorithm/builtin/sherlock.py         |  13 +
 .../operator/algorithm/builtin/sherlockPredict.py  |  23 ++
 .../operator/algorithm/builtin/shortestPath.py     |  26 +-
 .../systemds/operator/algorithm/builtin/sigmoid.py |   8 +-
 .../operator/algorithm/builtin/slicefinder.py      |  29 +-
 .../systemds/operator/algorithm/builtin/smote.py   |  11 +-
 .../systemds/operator/algorithm/builtin/softmax.py |   7 +
 .../systemds/operator/algorithm/builtin/split.py   |  12 +-
 .../operator/algorithm/builtin/splitBalanced.py    |   9 +-
 .../operator/algorithm/builtin/stableMarriage.py   |  55 ++-
 .../systemds/operator/algorithm/builtin/statsNA.py |  21 +-
 .../systemds/operator/algorithm/builtin/steplm.py  |  27 +-
 .../operator/algorithm/builtin/stratstats.py       |  60 +++-
 .../algorithm/builtin/symmetricDifference.py       |   8 +
 .../systemds/operator/algorithm/builtin/tSNE.py    |  11 +-
 .../operator/algorithm/builtin/toOneHot.py         |   7 +-
 .../operator/algorithm/builtin/tomeklink.py        |  10 +
 .../operator/algorithm/builtin/topk_cleaning.py    |   5 +-
 .../operator/algorithm/builtin/underSampling.py    |   8 +-
 .../systemds/operator/algorithm/builtin/union.py   |   6 +-
 .../systemds/operator/algorithm/builtin/unique.py  |   7 +
 .../systemds/operator/algorithm/builtin/univar.py  |  10 +-
 .../operator/algorithm/builtin/vectorToCsv.py      |   8 +
 .../operator/algorithm/builtin/winsorize.py        |   9 +-
 .../operator/algorithm/builtin/winsorizeApply.py   |  10 +
 .../systemds/operator/algorithm/builtin/xdummy1.py |   7 +
 .../systemds/operator/algorithm/builtin/xdummy2.py |   7 +
 .../systemds/operator/algorithm/builtin/xgboost.py |  32 +-
 .../builtin/xgboostPredictClassification.py        |  10 +
 .../algorithm/builtin/xgboostPredictRegression.py  |  10 +
 305 files changed, 4355 insertions(+), 3555 deletions(-)

diff --git a/scripts/builtin/WoE.dml b/scripts/builtin/WoE.dml
index d2e5a836f4..003d9b48a6 100644
--- a/scripts/builtin/WoE.dml
+++ b/scripts/builtin/WoE.dml
@@ -19,12 +19,21 @@
 #
 #-------------------------------------------------------------
 
-
-#######################################################################
 # function Weight of evidence / information gain
-# Inputs: The input dataset X, and  mask of the columns
-# Output: categorical columns are replaced with their frequencies
-#######################################################################
+#
+# INPUT:
+# --------------------------------------------------
+# X       ---
+# Y       ---
+# mask    ---
+# --------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------
+# X              ---
+# Y              ---
+# entropyMatrix  ---
+# ------------------------------------------------
 
 m_WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask)
 return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) {
diff --git a/scripts/builtin/WoEApply.dml b/scripts/builtin/WoEApply.dml
index ef84d896e6..c27fae0d05 100644
--- a/scripts/builtin/WoEApply.dml
+++ b/scripts/builtin/WoEApply.dml
@@ -19,6 +19,20 @@
 #
 #-------------------------------------------------------------
 
+# function Weight of evidence / information gain apply on new data
+#
+# INPUT:
+# --------------------------------------------------
+# X              ---
+# Y              ---
+# entropyMatrix  ---
+# --------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------
+# X              ---
+# ------------------------------------------------
+
 m_WoEApply = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix)
 return (Matrix[Double] X) {
   
diff --git a/scripts/builtin/_genBuiltinDocs.py b/scripts/builtin/_genBuiltinDocs.py
deleted file mode 100644
index abf5f16dd9..0000000000
--- a/scripts/builtin/_genBuiltinDocs.py
+++ /dev/null
@@ -1,370 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#-------------------------------------------------------------
-# This python script does take all files within the current directory and creates a corresponding markdown file out of the existing headers
-#   If there are specific parts of the header missing a warning within the terminal will be displayed
-#   The finished markdown file is then placed in the same directory
-#-------------------------------------------------------------
-# OUTPUTS
-#-------------------------------------------------------------
-#   Name                            Type    Default     Meaning
-#-------------------------------------------------------------
-#   SystemDs_Builtin_Markdown       .md    -------     A .md file of all files with a parsable header
-#-------------------------------------------------------------
-
-import re
-import os
-from mdutils.mdutils import MdUtils
-
-file_data_array = [] #Contains all valid header information
-
-incorrect_file_data = [] #Contains all file names with invalid header
-
-ALLOW_DIFFERENT_SIZED_HEADERS = False #Global FLAG to identify if Headers with only 3 arguments are valid or not
-
-THROW_EXCEPTION = False #Global FLAG to throw Exception on existing warnings - If set True no md file will be created unless all files have valid headers!
-
-
-#This class contains all necessary variables for the complete representation of the markdown file
-class File_data:
-    def __init__(self, inputParameterCount, inputParams, I_HEADER_MISSING, I_DESCRIPTION_MISSING, outputParameterCount, outputParams, O_HEADER_MISSING, O_DESCRIPTION_MISSING, description, additional, fileName):
-        self.param_count = inputParameterCount
-        self.param = inputParams
-        self.input_header_missing = I_HEADER_MISSING
-        self.input_description_missing = I_DESCRIPTION_MISSING
-        self.output_param_count = outputParameterCount
-        self.output = outputParams
-        self.output_header_missing = O_HEADER_MISSING
-        self.output_description_missing = O_DESCRIPTION_MISSING
-        self.description = description
-        self.additional = additional
-        self.fileName = fileName
-
-
-#class to make colored output possible
-class bcolors:
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    UNDERLINE = '\033[4m'
-
-
-#This functions simply iterates over all files, given at the specific location and creates the corresponding markdown files
-def callWithAbsoluteParameters():
-    directory = os.path.dirname(os.path.realpath(__file__))
-
-    for entry in os.scandir(directory):
-        if entry.path.endswith(".dml") and entry.is_file():
-            with open(entry.path) as f:
-                parseFile(f.readlines(), f.name)
-
-    cleaned_array = cleanFileDataArray()
-    createMarkDownFile(cleaned_array)
-
-
-#Function to start parsing the file, it skips the lincence agreement and then forwards the rest to the parse Information Function
-def parseFile(lines, fileName):
-    fileName = os.path.basename(fileName)
-    fileName = fileName.split('.')[0]
-
-    EMPTY_LINES = True
-    markDownArray = []
-    license_break_count = 0
-
-    lineBreakRegex = re.compile('#.[\-]+')  # Match all lines which look like #---- or # ---
-    emptyLineRegex = re.compile('^\s*$') # Matches all empty lines within the header
-    emptyCommentLineRegex = re.compile('^#\s{1,}$')  # Match lines whick look like this # followed by abritrary number of spaces
-    commentLineRegex = re.compile('^#.*$') #Matches any line with a # in front
-    functionStartRegex = re.compile('^[A-Za-z_]*\s*=+') #Because we know that the start of the function name must be within the range of a-Z we can simply search for it to find the start of the program
-
-    for line in lines:
-        if lineBreakRegex.match(line) and license_break_count < 2:  #Skip Apache License info
-            license_break_count += 1
-            continue
-        elif(license_break_count < 2):   #skip all lines within the apache license header
-            continue
-        elif(license_break_count >= 2) and EMPTY_LINES and (emptyLineRegex.match(line) or emptyCommentLineRegex.match(line)):
-            continue
-        elif functionStartRegex.match(line):
-            break
-        elif bool(commentLineRegex.match(line)) is False:
-            continue
-        else:
-            EMPTY_LINES = False
-            markDownArray.append(line)
-
-    if len(markDownArray) > 0: #If our markDownArray is empty we append the name to the incorrect file_data_array
-        parseInformation(markDownArray, fileName)
-    else:
-        incorrect_file_data.append(fileName)
-
-
-def parseInformation(markDownArray, fileName):
-    markDownArray = [e[1:] for e in markDownArray]  # First I cut the first character of all lines because it is simply a # character
-
-    INPUT = False #Then I start to set some global flags to identify the status of my program
-    INPUT_FINISHED = False
-    OUTPUT = False
-    OUTPUT_FINISHED = False
-    DESCRIPTION = True  #When I start parsing the File, the first line(s) should ALWAYS contain the description therefore I start with True
-    commentLineCount = 0 #To track how many comment_lines where found inside the headerRegex
-    additionalInfos = [] #If there are information which do not match the syntax or are not identifyable it is put inside this array
-    description = [] #contains the description of the corresponding file
-    I_DESCRIPTION_MISSING = False
-    I_HEADER_MISSING = False
-    inputParameterCount = 0
-    inputParameters = []
-    O_DESCRIPTION_MISSING = False
-    O_HEADER_MISSING = False
-    outputParameterCount = 0
-    outputParameters = []
-
-    commentLineRegex = re.compile('^#\s{0,}$')
-    emptyLineRegex = re.compile('^\s*$')
-    headerRegex = re.compile('\s{1,}[A-Za-z]{2,}')
-    inputStringRegex = re.compile('^#*\s{1,}input', flags=re.I)
-    outputStringRegex = re.compile('^#*\s{0,}(output|return)', flags=re.I)
-
-    for line in markDownArray:
-        if INPUT:
-            inputParameterCount, INPUT, INPUT_FINISHED, I_HEADER_MISSING, commentLineCount = parseParam(inputParameters, inputParameterCount, INPUT, INPUT_FINISHED, I_HEADER_MISSING, commentLineCount, additionalInfos, line)
-
-        if OUTPUT:
-            outputParameterCount, OUTPUT, OUTPUT_FINISHED, O_HEADER_MISSING, commentLineCount = parseParam(outputParameters, outputParameterCount, OUTPUT, OUTPUT_FINISHED, O_HEADER_MISSING, commentLineCount, additionalInfos, line)
-
-        if headerRegex.match(line) and DESCRIPTION:
-            if inputStringRegex.match(line):
-                DESCRIPTION = False
-                INPUT = True
-                continue
-
-            description.append(line)
-        else:
-            DESCRIPTION = False
-
-        if emptyLineRegex.match(line) or commentLineRegex.match(line):
-            continue
-
-        if inputStringRegex.match(line) and INPUT_FINISHED == False:
-            INPUT = True
-
-        if outputStringRegex.match(line) and OUTPUT_FINISHED is False:
-            OUTPUT = True
-
-    file_data_array.append(File_data(inputParameterCount, inputParameters, I_HEADER_MISSING, I_DESCRIPTION_MISSING, outputParameterCount, outputParameters, O_HEADER_MISSING, O_DESCRIPTION_MISSING, description, additionalInfos, fileName))
-
-
-def createMarkDownFile(file_array):
-    mdFile = MdUtils(file_name='_genBuiltinDocs_Out', title='Markdown Files for scripts')
-    mdFile.new_header(level=1, title='Overview')  # style is set 'atx' format by default.
-
-    for entry in file_array:
-        inputParameters = entry.param
-        outputParameters = entry.output
-        inputParameterCount = entry.param_count
-        outputParameterCount = entry.output_param_count
-        description = entry.description
-        additionalInfos = entry.additional
-        fileName = entry.fileName
-
-        #First I strip all the newline characters from the end of each string and replace the newlines within strings wit spaces
-        inputParameters = replaceNewlineCharacter(inputParameters)
-        outputParameters = replaceNewlineCharacter(outputParameters)
-        description = replaceNewlineCharacter(description)
-
-        titleString = fileName + "-Function"
-        mdFile.new_header(level=2, title=titleString)  # style is set 'atx' format by default.
-
-        for line in description:
-            mdFile.new_paragraph(line)
-
-        mdFile.new_header(level=3, title="Usage")
-        usage = fileName + "("
-
-        for argument in inputParameters[4::4]:
-            usage += (argument + ", ")
-
-        if len(inputParameters) > 0:
-            usage = usage[:-2] + ')'
-        else:
-            usage = usage + ')'
-
-        mdFile.insert_code(usage, language='python')
-        mdFile.new_header(level=3, title="Arguments")
-
-        if inputParameterCount > 0:
-            rows = int(len(inputParameters)/inputParameterCount)
-            mdFile.new_table(columns=inputParameterCount, rows=rows, text=inputParameters, text_align='center')
-
-        mdFile.new_header(level=3, title="Returns")
-
-        if outputParameterCount > 0:
-            rows = int(len(outputParameters) / outputParameterCount)
-            mdFile.new_table(columns=outputParameterCount, rows=rows, text=outputParameters, text_align='center')
-        else:
-            for line in additionalInfos:
-                mdFile.new_paragraph(line)
-
-    mdFile.create_md_file()
-
-
-def cleanFileDataArray():
-    #Here I go through all elements of the file_data_array check if everything is correct and if not create a file  or meaningfull output
-    new_file_data_array = []
-    missingValues = False
-    for entry in file_data_array:
-        missingParts = False
-        missingParameterString = ''
-        inputParameterCount = entry.param_count
-        I_HEADER_MISSING = entry.input_header_missing
-        I_DESCRIPTION_MISSING = entry.input_description_missing
-        outputParameterCount = entry.output_param_count
-        O_HEADER_MISSING = entry.output_header_missing
-        O_DESCRIPTION_MISSING = entry.output_description_missing
-        description = entry.description
-        fileName = entry.fileName
-
-        if len(description) == 0:
-            missingParts = True
-            missingParameterString += "There is no description of the function available!\n"
-
-        if I_HEADER_MISSING is True:
-            missingParts = True
-            missingParameterString += "There is either no input parameter header given or the syntax is incorrect!\n"
-
-        if I_DESCRIPTION_MISSING is True:
-            missingParts = True
-            missingParameterString += "There is either no dedicated Input section header or syntax is incorrect!\n"
-
-        if inputParameterCount == 0:
-            missingParts = True
-            missingParameterString += "There are either no parameters given within the file or syntax is incorrect!\n"
-
-        if O_DESCRIPTION_MISSING is True:
-            missingParts = True
-            missingParameterString += "There is either no dedicated Output section header or syntax is incorrect!\n"
-
-        if O_HEADER_MISSING is True:
-            missingParts = True
-            missingParameterString += "There is either no output parameter header given or the syntax is incorrect!\n"
-
-        if outputParameterCount == 0:
-            os.system('color')
-            missingParts = True
-            missingParameterString += "There are either no output parameters given within the file or syntax is incorrect!\n"
-
-        if missingParts:
-            missingValues = True
-            os.system('color')
-            print(f'{bcolors.WARNING}For the File: {bcolors.UNDERLINE}{bcolors.OKGREEN}' +  fileName + f'{bcolors.ENDC}{bcolors.WARNING} following errors occured: {bcolors.ENDC}')
-            print(missingParameterString)
-        else:
-            new_file_data_array.append(entry)
-
-    for entry in incorrect_file_data:
-        os.system('color')
-        print(f'{bcolors.WARNING}For the File: {bcolors.UNDERLINE}{bcolors.OKGREEN}' + entry + f'{bcolors.ENDC}{bcolors.WARNING} no header was found at all!{bcolors.ENDC}')
-
-    if len(incorrect_file_data) > 0:
-        missingValues = True
-
-    if missingValues and THROW_EXCEPTION is True:
-        raise Exception(f'{bcolors.FAIL}At least one file does not fit the required Syntax!{bcolors.ENDC}')
-
-    return new_file_data_array
-
-
-def parseParam(param_array, param_count, FLAG, FLAG_FIN, FLAG_HEADER_MISSING, comment_line_count, additional_infos, line):
-    lineBreakRegex = re.compile('.[\-]+')  # Match all lines which look like ---- or  ---
-    commentLineRegex = re.compile('^#\s{0,}$')
-    emptyLineRegex = re.compile('^\s*$')
-    outputStringRegex = re.compile('^#*\s{0,}(output|return)', flags=re.I)  # #\s{1,}.*input  <-- other possibility to find all input fields, problem here that it matches all lines with input in the test
-    simpleWordRegex = re.compile('[A-Za-z]+')
-    continuedParamDescribtionRegex = re.compile('^#+\s{10,}')  # if my parameter input matches this i obviously have a continuation of the param describtion
-    arbritraryNumberOfSpacesRegex = re.compile('\s{1,}')
-
-    if lineBreakRegex.match(line):
-        comment_line_count += 1
-        if comment_line_count > 2:
-            FLAG = False
-            comment_line_count = 0
-        return param_count, FLAG, FLAG_FIN, FLAG_HEADER_MISSING, comment_line_count
-
-    if comment_line_count == 1:
-        # current line should only contain parameter names
-        # In case there are no parameter names given but only the parameters, we set the parameter header missing Flag
-        if len(arbritraryNumberOfSpacesRegex.split(line)) >= 7:
-            FLAG_HEADER_MISSING = True
-            comment_line_count += 1
-            param_count = 4
-            return param_count, FLAG, FLAG_FIN, FLAG_HEADER_MISSING, comment_line_count
-
-        for parameter in line.split(' '):
-            if simpleWordRegex.match(parameter):
-                param_array.append(parameter)
-                param_count += 1
-
-        #Here I check if there are headers with a different amount of parameters if yes and the global flag is set to false I return a Warning
-        if param_count < 4 and ALLOW_DIFFERENT_SIZED_HEADERS is False:
-            FLAG_HEADER_MISSING = True
-            return param_count, FLAG, FLAG_FIN, FLAG_HEADER_MISSING, comment_line_count
-
-    if comment_line_count == 2:
-        if continuedParamDescribtionRegex.match(line):
-            splitted_line = continuedParamDescribtionRegex.split(line)
-            if (len(splitted_line)) == 2:
-                param_array[-1] += splitted_line[1]
-        elif commentLineRegex.match(line) or emptyLineRegex.match(line):
-            FLAG_FIN = True
-            FLAG = False
-            comment_line_count = 0
-        else:
-            splitParameterString(line, param_array, param_count)
-    else:
-        additional_infos.append(line)
-
-    if (outputStringRegex.match(line) or commentLineRegex.match(line)) and len(param_array) > 1:
-        FLAG = False
-        FLAG_FIN = True
-        comment_line_count = 0
-
-    return param_count, FLAG, FLAG_FIN, FLAG_HEADER_MISSING, comment_line_count
-
-
-def splitParameterString(line, array, size):
-    arbritraryNumberOfSpacesRegex = re.compile('\s{1,}')
-    #I know that the first occurence of [A-Za-z]\s is the first Parameter so I can split here
-    new_line = arbritraryNumberOfSpacesRegex.split(line, size)
-    if len(new_line) > size:
-        for i in range(size):
-            array.append(new_line[i+1])
-
-
-def replaceNewlineCharacter(array):
-    array = list(map(str.strip, array))
-    array = [s.replace("\n", " ") for s in array]
-    return array
-
-
-if __name__ == '__main__':
-    callWithAbsoluteParameters()
\ No newline at end of file
diff --git a/scripts/builtin/abstain.dml b/scripts/builtin/abstain.dml
index aaf5f43a7d..6d9035101d 100644
--- a/scripts/builtin/abstain.dml
+++ b/scripts/builtin/abstain.dml
@@ -19,25 +19,22 @@
 #
 #-------------------------------------------------------------
 
-# This function calls the multiLogReg-function in which solves Multinomial Logistic Regression using Trust Region method
+# This function calls the multiLogReg-function in which solves Multinomial
+# Logistic Regression using Trust Region method
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]      ---       Location to read the matrix of feature vectors
-# Y               Matrix[Double]      ---       Location to read the matrix with category labels
-# threshold       Double              0.0       ---
-# verbose         Boolean             FALSE     flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------
+# X            Location to read the matrix of feature vectors
+# Y            Location to read the matrix with category labels
+# threshold    ---
+# verbose      flag specifying if logging information should be printed
+# -------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Xout   Matrix[Double]   ---
-# Yout   Matrix[Double]   ---
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+# Xout     ---
+# Yout     ---
+# -------------------------------------------------------------------------------------
 
 m_abstain = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
 return (Matrix[Double] Xout, Matrix[Double] Yout)
diff --git a/scripts/builtin/als.dml b/scripts/builtin/als.dml
index c63323da6e..e7ab63f06b 100644
--- a/scripts/builtin/als.dml
+++ b/scripts/builtin/als.dml
@@ -23,37 +23,33 @@
 # using different implementations of the Alternating-Least-Squares (ALS) algorithm.
 # Matrices U and V are computed by minimizing a loss function (with regularization).
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---        Location to read the input matrix X to be factorized
-# rank    Integer          10         Rank of the factorization
-# regType String           "L2"       Regularization: 
-#                                      "L2" = L2 regularization;
-#                                         f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                                                  + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
-#                                      "wL2" = weighted L2 regularization
-#                                         f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                                                  + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) 
-#                                                  + sum (V ^ 2 * col_nonzeros))
-# reg     Double           0.000001   Regularization parameter, no regularization if 0.0
-# maxi    Integer          50         Maximum number of iterations
-# check   Boolean          TRUE       Check for convergence after every iteration, i.e., updating U and V once
-# thr     Double           0.0001     Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-#                                     if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#                                     if check is FALSE thr is ignored
-# seed    Integer          1324521    The seed to random parts of the algorithm
-# verbose Boolean          TRUE       If the algorithm should run verbosely
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X        Location to read the input matrix X to be factorized
+# rank     Rank of the factorization
+# regType  Regularization: 
+#           "L2" = L2 regularization;
+#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                       + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+#           "wL2" = weighted L2 regularization
+#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                       + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) 
+#                       + sum (V ^ 2 * col_nonzeros))
+# reg      Regularization parameter, no regularization if 0.0
+# maxi     Maximum number of iterations
+# check    Check for convergence after every iteration, i.e., updating U and V once
+# thr      Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+#          if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#          if check is FALSE thr is ignored
+# seed     The seed to random parts of the algorithm
+# verbose  If the algorithm should run verbosely
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE           MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# U     Matrix         An m x r matrix where r is the factorization rank
-# V     Matrix         An m x r matrix where r is the factorization rank
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# U     An m x r matrix where r is the factorization rank
+# V     An m x r matrix where r is the factorization rank
+# -------------------------------------------------------------------------------------------
 
 m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001,
   Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Integer seed = 1342516, Boolean verbose = TRUE)
diff --git a/scripts/builtin/alsCG.dml b/scripts/builtin/alsCG.dml
index 1a8697b4d8..7c4f546656 100644
--- a/scripts/builtin/alsCG.dml
+++ b/scripts/builtin/alsCG.dml
@@ -23,37 +23,33 @@
 # using the Alternating-Least-Squares (ALS) algorithm with conjugate gradient.
 # Matrices U and V are computed by minimizing a loss function (with regularization).
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---        Location to read the input matrix X to be factorized
-# rank    Integer          10         Rank of the factorization
-# regType String           "L2"       Regularization:
-#                                     "L2" = L2 regularization;
-#                                        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                                                 + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
-#                                     "wL2" = weighted L2 regularization
-#                                        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#                                                 + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
-#                                                 + sum (V ^ 2 * col_nonzeros))
-# reg    Double            0.000001   Regularization parameter, no regularization if 0.0
-# maxi   Integer           50         Maximum number of iterations
-# check  Boolean           TRUE       Check for convergence after every iteration, i.e., updating U and V once
-# thr    Double            0.0001     Assuming check is set to TRUE, the algorithm stops and convergence is declared
-#                                     if the decrease in loss in any two consecutive iterations falls below this threshold;
-#                                     if check is FALSE thr is ignored
-# seed    Integer          1324521    The seed to random parts of the algorithm
-# verbose Boolean          TRUE       If the algorithm should run verbosely
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------------
+# X         Location to read the input matrix X to be factorized
+# rank      Rank of the factorization
+# regType   Regularization:
+#           "L2" = L2 regularization;
+#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                       + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+#           "wL2" = weighted L2 regularization
+#              f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#                       + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
+#                       + sum (V ^ 2 * col_nonzeros))
+# reg       Regularization parameter, no regularization if 0.0
+# maxi      Maximum number of iterations
+# check     Check for convergence after every iteration, i.e., updating U and V once
+# thr       Assuming check is set to TRUE, the algorithm stops and convergence is declared
+#           if the decrease in loss in any two consecutive iterations falls below this threshold;
+#           if check is FALSE thr is ignored
+# seed      The seed to random parts of the algorithm
+# verbose   If the algorithm should run verbosely
+# --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# U     Matrix[Double]     An m x r matrix where r is the factorization rank
-# V     Matrix[Double]     An m x r matrix where r is the factorization rank
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# U     An m x r matrix where r is the factorization rank
+# V     An m x r matrix where r is the factorization rank
+# --------------------------------------------------------------------------------------------
 
 m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50,
  Boolean check = TRUE, Double thr = 0.0001, Integer seed = 132521, Boolean verbose = TRUE)
diff --git a/scripts/builtin/alsDS.dml b/scripts/builtin/alsDS.dml
index 2a35d4fa52..636f0ef488 100644
--- a/scripts/builtin/alsDS.dml
+++ b/scripts/builtin/alsDS.dml
@@ -24,29 +24,25 @@
 # approximate factorization of a low-rank matrix V into two matrices L and R.
 # Matrices L and R are computed by minimizing a loss function (with regularization).
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---        Location to read the input matrix V to be factorized
-# rank    Integer          10         Rank of the factorization
-# reg     Double           0.000001   Regularization parameter, no regularization if 0.0
-# maxi    Integer          50         Maximum number of iterations
-# check   Boolean          FALSE      Check for convergence after every iteration, i.e., updating L and R once
-# thr     Double           0.0001     Assuming check is set to TRUE, the algorithm stops and convergence is declared
-#                                     if the decrease in loss in any two consecutive iterations falls below this threshold;
-#                                     if check is FALSE thr is ignored
-# seed    Integer          1324521    The seed to random parts of the algorithm
-# verbose Boolean          TRUE       If the algorithm should run verbosely
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X        Location to read the input matrix V to be factorized
+# rank     Rank of the factorization
+# reg      Regularization parameter, no regularization if 0.0
+# maxi     Maximum number of iterations
+# check    Check for convergence after every iteration, i.e., updating L and R once
+# thr      Assuming check is set to TRUE, the algorithm stops and convergence is declared
+#          if the decrease in loss in any two consecutive iterations falls below this threshold;
+#          if check is FALSE thr is ignored
+# seed     The seed to random parts of the algorithm
+# verbose  If the algorithm should run verbosely
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# U     Matrix[Double]     An m x r matrix where r is the factorization rank
-# V     Matrix[Double]     An m x r matrix where r is the factorization rank
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# U     An m x r matrix where r is the factorization rank
+# V     An m x r matrix where r is the factorization rank
+# -------------------------------------------------------------------------------------------
 
 m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001, 
   Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Integer seed = 321452, Boolean verbose = TRUE)
diff --git a/scripts/builtin/alsPredict.dml b/scripts/builtin/alsPredict.dml
index d6181f8434..18592147ef 100644
--- a/scripts/builtin/alsPredict.dml
+++ b/scripts/builtin/alsPredict.dml
@@ -23,22 +23,18 @@
 # using 2 factor matrices L and R. We assume that all users have rates 
 # at least once and all items have been rates at least once.
 #
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# userIDs Matrix[Double]   ---      Column vector of user-ids (n x 1)
-# I       Matrix[Double]   ---      Indicator matrix user-id x user-id to exclude from scoring
-# L       Matrix[Double]   ---      The factor matrix L: user-id x feature-id
-# R       Matrix[Double]   ---      The factor matrix R: feature-id x item-id
-# ---------------------------------------------------------------------------------------------
+# INPUT: 
+# --------------------------------------------------------------------
+# userIDs  Column vector of user-ids (n x 1)
+# I        Indicator matrix user-id x user-id to exclude from scoring
+# L        The factor matrix L: user-id x feature-id
+# R        The factor matrix R: feature-id x item-id
+# --------------------------------------------------------------------
 #
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-# NAME  TYPE             MEANING
-# ---------------------------------------------------------------------------------------------
-# Y     Matrix[Double]   The output user-id/item-id/score#
-# ---------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------
+# Y     The output user-id/item-id/score#
+# ----------------------------------------------------------------------------
 
 m_alsPredict = function(Matrix[Double] userIDs, Matrix[Double] I, Matrix[Double] L, Matrix[Double] R)
   return (Matrix[Double] Y)
diff --git a/scripts/builtin/alsTopkPredict.dml b/scripts/builtin/alsTopkPredict.dml
index 90d7cd833e..0c68646daa 100644
--- a/scripts/builtin/alsTopkPredict.dml
+++ b/scripts/builtin/alsTopkPredict.dml
@@ -23,25 +23,20 @@
 # using 2 factor matrices L and R. We assume that all users have rates 
 # at least once and all items have been rates at least once.
 #
-# INPUT   PARAMETERS:
-# -----------------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT  MEANING
-# -----------------------------------------------------------------------------------------------------------------------------
-# userIDs Matrix[Double]   ---      Column vector of user-ids (n x 1)
-# I       Matrix[Double]   ---      Indicator matrix user-id x user-id to exclude from scoring
-# L       Matrix[Double]   ---      The factor matrix L: user-id x feature-id
-# R       Matrix[Double]   ---      The factor matrix R: feature-id x item-id
-# K       Int              5        The number of top-K items
-#
-# ------------------------------------------------------------------------------------------------------------------------------
+# INPUT:   
+# ----------------------------------------------------------------------------------------------------
+# userIDs  Column vector of user-ids (n x 1)
+# I        Indicator matrix user-id x user-id to exclude from scoring
+# L        The factor matrix L: user-id x feature-id
+# R        The factor matrix R: feature-id x item-id
+# K        The number of top-K items
+# -----------------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             MEANING
-# ------------------------------------------------------------------------------------------------------------------------------
-# TopIxs  Matrix[Double]   A matrix containing the top-K item-ids with highest predicted ratings for the specified users (rows)
-# TopVals Matrix[Double]   A matrix containing the top-K predicted ratings for the specified users (rows)
-# ------------------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------
+# TopIxs   A matrix containing the top-K item-ids with highest predicted ratings for the specified users (rows)
+# TopVals  A matrix containing the top-K predicted ratings for the specified users (rows)
+# --------------------------------------------------------------------------------------------------------------
 
 m_alsTopkPredict = function(Matrix[Double] userIDs, Matrix[Double] I, Matrix[Double] L, Matrix[Double] R, Integer K = 5)
   return (Matrix[Double] TopIxs, Matrix[Double] TopVals)
diff --git a/scripts/builtin/apply_pipeline.dml b/scripts/builtin/apply_pipeline.dml
index d7cee4da3d..7c268206ea 100644
--- a/scripts/builtin/apply_pipeline.dml
+++ b/scripts/builtin/apply_pipeline.dml
@@ -22,31 +22,25 @@
 # This script will read the dirty and clean data, then it will apply the best pipeline on dirty data
 # and then will classify both cleaned dataset and check if the cleaned dataset is performing same as original dataset
 # in terms of classification accuracy
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE               DEFAULT            MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# trainData         Frame[Unknown]      ---
-# testData          Frame[Unknown]      ---
-# metaData          Frame[Unknown]      as.frame("NULL")
-# lp                Frame[Unknown]      ---
-# pip               Frame[Unknown]      ---
-# hp                Frame[Unknown]      ---
-# evaluationFunc    String              ---
-# evalFunHp         Matrix[Double]      ---
-# isLastLabel       Boolean             TRUE
-# correctTypos      Boolean             FALSE
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------
+# trainData         ---
+# testData          ---
+# metaData          ---
+# lp                ---
+# pip               ---
+# hp                ---
+# evaluationFunc    ---
+# evalFunHp         ---
+# isLastLabel       ---
+# correctTypos      ---
+# --------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# scores         Matrix[Double]   ---
-# ----------------------------------------------------------------------------------------------------------------------
-
+# -----------------------------------------------------------------------------------------------
+# scores   ---
+# -----------------------------------------------------------------------------------------------
 
 source("scripts/builtin/topk_cleaning.dml") as topk;
 
diff --git a/scripts/builtin/arima.dml b/scripts/builtin/arima.dml
index e1408b6123..f2645fcbc4 100644
--- a/scripts/builtin/arima.dml
+++ b/scripts/builtin/arima.dml
@@ -21,29 +21,25 @@
 
 # Builtin function that implements ARIMA
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                 Matrix[Double]    ---       The input Matrix to apply Arima on.
-# max_func_invoc    Int               1000
-# p                 Int               0         non-seasonal AR order
-# d                 Int               0         non-seasonal differencing order
-# q                 Int               0         non-seasonal MA order
-# P                 Int               0         seasonal AR order
-# D                 Int               0         seasonal differencing order
-# Q                 Int               0         seasonal MA order
-# s                 Int               1         period in terms of number of time-steps
-# include_mean      Boolean           FALSE     center to mean 0, and include in result
-# solver            String            jacobi    solver, is either "cg" or "jacobi"
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X                 The input Matrix to apply Arima on.
+# max_func_invoc    ---
+# p                 non-seasonal AR order
+# d                 non-seasonal differencing order
+# q                 non-seasonal MA order
+# P                 seasonal AR order
+# D                 seasonal differencing order
+# Q                 seasonal MA order
+# s                 period in terms of number of time-steps
+# include_mean      center to mean 0, and include in result
+# solver            solver, is either "cg" or "jacobi"
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# best_point        Matrix[Double]           The calculated coefficients
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# best_point   The calculated coefficients
+# ----------------------------------------------------------------------------------------
 
 m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
   Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
diff --git a/scripts/builtin/autoencoder_2layer.dml b/scripts/builtin/autoencoder_2layer.dml
index 3028f6914c..ae2d30e67a 100644
--- a/scripts/builtin/autoencoder_2layer.dml
+++ b/scripts/builtin/autoencoder_2layer.dml
@@ -27,41 +27,37 @@
 # Currently, tanh is set to be the activation function. 
 # By re-implementing 'func' DML-bodied function, one can change the activation.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE             DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X            Matrix[Double]   ---      Filename where the input is stored
-# num_hidden1  Integer          ---      Number of neurons in the 1st hidden layer
-# num_hidden2  Integer          ---      Number of neurons in the 2nd hidden layer
-# max_epochs   Integer          ---      Number of epochs to train for
-# full_obj     Boolean          FALSE    If TRUE, Computes objective function value (squared-loss)
-#                                        at the end of each epoch. Note that, computing the full 
-#                                        objective can take a lot of time. 
-# batch_size   Integer          256      Mini-batch size (training parameter)
-# step         Double           1e-5     Initial step size (training parameter)
-# decay        Double           0.95     Decays step size after each epoch (training parameter)
-# mu           Double           0.9      Momentum parameter (training parameter)
-# W1_rand      Matrix[Double]   Empty    Weights might be initialized via input matrices
-# W2_rand      Matrix[Double]   Empty    
-# W3_rand      Matrix[Double]   Empty    
-# W4_rand      Matrix[Double]   Empty
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# X             Filename where the input is stored
+# num_hidden1   Number of neurons in the 1st hidden layer
+# num_hidden2   Number of neurons in the 2nd hidden layer
+# max_epochs    Number of epochs to train for
+# full_obj      If TRUE, Computes objective function value (squared-loss)
+#               at the end of each epoch. Note that, computing the full 
+#               objective can take a lot of time. 
+# batch_size    Mini-batch size (training parameter)
+# step          Initial step size (training parameter)
+# decay         Decays step size after each epoch (training parameter)
+# mu            Momentum parameter (training parameter)
+# W1_rand       Weights might be initialized via input matrices
+# W2_rand       --- 
+# W3_rand       ---
+# W4_rand       ---
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# W1_out   Matrix[Double]    Matrix storing weights between input layer and 1st hidden layer
-# b1_out   Matrix[Double]    Matrix storing bias between input layer and 1st hidden layer
-# W2_out   Matrix[Double]    Matrix storing weights between 1st hidden layer and 2nd hidden layer
-# b2_out   Matrix[Double]    Matrix storing bias between 1st hidden layer and 2nd hidden layer
-# W3_out   Matrix[Double]    Matrix storing weights between 2nd hidden layer and 3rd hidden layer
-# b3_out   Matrix[Double]    Matrix storing bias between 2nd hidden layer and 3rd hidden layer
-# W4_out   Matrix[Double]    Matrix storing weights between 3rd hidden layer and output layer
-# b4_out   Matrix[Double]    Matrix storing bias between 3rd hidden layer and output layer
-# HIDDEN   Matrix[Double]    Matrix storing the hidden (2nd) layer representation if needed
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
+# W1_out   Matrix storing weights between input layer and 1st hidden layer
+# b1_out   Matrix storing bias between input layer and 1st hidden layer
+# W2_out   Matrix storing weights between 1st hidden layer and 2nd hidden layer
+# b2_out   Matrix storing bias between 1st hidden layer and 2nd hidden layer
+# W3_out   Matrix storing weights between 2nd hidden layer and 3rd hidden layer
+# b3_out   Matrix storing bias between 2nd hidden layer and 3rd hidden layer
+# W4_out   Matrix storing weights between 3rd hidden layer and output layer
+# b4_out   Matrix storing bias between 3rd hidden layer and output layer
+# HIDDEN   Matrix storing the hidden (2nd) layer representation if needed
+# ----------------------------------------------------------------------------------------------------
 
 m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer num_hidden2, Integer max_epochs,
   Boolean full_obj = FALSE, Integer batch_size = 256, Double step = 1e-5, Double decay = 0.95, Double mu = 0.9,
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 04e28517d8..43e3a1a9c5 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -19,37 +19,34 @@
 #
 #-------------------------------------------------------------
 
-# In The bandit function the objective is to find an arm that optimises a known functional of the unknown arm-reward distributions.
+# In The bandit function the objective is to find an arm that optimizes
+# a known functional of the unknown arm-reward distributions.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE               DEFAULT            MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_train           Matrix[Double]     ---
-# Y_train           Matrix[Double]     ---
-# X_test            Matrix[Double]     ---
-# Y_test            Matrix[Double]     ---
-# metaList          List[Unknown]      ---
-# evaluationFunc    String             ---
-# evalFunHp         Matrix[Double]     ---
-# lp                Frame[Unknown]     ---
-# primitives        Frame[Unknown]     ---
-# params            Frame[Unknown]     ---
-# K                 Integer            3
-# R                 Integer            50
-# baseLineScore     Double
-# cv                Boolean
-# cvk               Integer            2
-# verbose           Boolean            TRUE
-# output            String             ""
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------
+# X_train         ---
+# Y_train         ---
+# X_test          ---
+# Y_test          ---
+# metaList        ---
+# evaluationFunc  ---
+# evalFunHp       ---
+# lp              ---
+# primitives      ---
+# params          ---
+# K               ---
+# R               ---
+# baseLineScore   ---
+# cv              ---
+# cvk             ---
+# verbose         ---
+# output          ---
+# -----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# perf      Boolean
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------
+# perf    ---
+# --------------------------------------
 
 m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, Matrix[Double] X_test, Matrix[Double] Y_test, List[Unknown] metaList,
   String evaluationFunc, Matrix[Double] evalFunHp, Frame[Unknown] lp, Matrix[Double] lpHp, Frame[Unknown] primitives, Frame[Unknown] param, Integer k = 3,
diff --git a/scripts/builtin/bivar.dml b/scripts/builtin/bivar.dml
index 27629da4b6..dbf36d3455 100644
--- a/scripts/builtin/bivar.dml
+++ b/scripts/builtin/bivar.dml
@@ -23,27 +23,23 @@
 # Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n}
 # compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE               DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X              Matrix[Double]     ---      Input matrix
-# S1             Matrix[Integer]    ---      First attribute set {A_11, A_12, ... A_1m}
-# S2             Matrix[Integer]    ---      Second attribute set {A_21, A_22, ... A_2n}
-# T1             Matrix[Integer]    ---      Kind for attributes in S1
-#                                            (kind=1 for scale, kind=2 for nominal, kind=3 for ordinal)
-# verbose        Boolean            ---      Print bivar stats
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X          Input matrix
+# S1         First attribute set {A_11, A_12, ... A_1m}
+# S2         Second attribute set {A_21, A_22, ... A_2n}
+# T1         Kind for attributes in S1
+#            (kind=1 for scale, kind=2 for nominal, kind=3 for ordinal)
+# verbose    Print bivar stats
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                       TYPE       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# basestats_scale_scale      Matrix     basestats_scale_scale as output with bivar stats
-# basestats_nominal_scale    Matrix     basestats_nominal_scale as output with bivar stats
-# basestats_nominal_nominal  Matrix     basestats_nominal_nominal as output with bivar stats
-# basestats_ordinal_ordinal  Matrix     basestats_ordinal_ordinal as output with bivar stats
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------------------
+# basestats_scale_scale      basestats_scale_scale as output with bivar stats
+# basestats_nominal_scale    basestats_nominal_scale as output with bivar stats
+# basestats_nominal_nominal  basestats_nominal_nominal as output with bivar stats
+# basestats_ordinal_ordinal  basestats_ordinal_ordinal as output with bivar stats
+# -----------------------------------------------------------------------------------------------------------
 
 m_bivar = function(Matrix[Double] X, Matrix[Double] S1, Matrix[Double] S2, Matrix[Double] T1, Matrix[Double] T2, Boolean verbose)
   return (Matrix[Double] basestats_scale_scale, Matrix[Double] basestats_nominal_scale, Matrix[Double] basestats_nominal_nominal, Matrix[Double] basestats_ordinal_ordinal)
diff --git a/scripts/builtin/components.dml b/scripts/builtin/components.dml
index 20dafbc425..dfbe8bd700 100644
--- a/scripts/builtin/components.dml
+++ b/scripts/builtin/components.dml
@@ -24,28 +24,24 @@
 # where each component is identified by the maximum vertex ID
 # (i.e., row/column position of the input graph) 
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE        DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix      ---       Location to read the matrix of feature vectors
-# Y         Matrix      ---       Location to read the matrix with category labels
-# icpt      Integer     0         Intercept presence, shifting and rescaling X columns: 0 = no intercept,
-#                                 no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
-#                                 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# tol       Double      0.000001  tolerance ("epsilon")
-# reg       Double      0.0       regularization parameter (lambda = 1/C); intercept is not regularized
-# maxi      Integer     100       max. number of outer (Newton) iterations
-# maxii     Integer     0         max. number of inner (conjugate gradient) iterations, 0 = no max
-# verbose   Boolean     FALSE     flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------------------
+# X        Location to read the matrix of feature vectors
+# Y        Location to read the matrix with category labels
+# icpt     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+#          no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
+#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# tol      tolerance ("epsilon")
+# reg      regularization parameter (lambda = 1/C); intercept is not regularized
+# maxi     max. number of outer (Newton) iterations
+# maxii    max. number of inner (conjugate gradient) iterations, 0 = no max
+# verbose  flag specifying if logging information should be printed
+# -----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE            MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# betas    Matrix[Double]  regression betas as output for prediction
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
+# betas  regression betas as output for prediction
+# ----------------------------------------------------------------------------------------------------
 
 m_components = function(Matrix[Double] G, Integer maxi = 0, Boolean verbose = TRUE) 
   return (Matrix[Double] C) 
diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml
index 9d291f58d7..a56a936cc9 100644
--- a/scripts/builtin/confusionMatrix.dml
+++ b/scripts/builtin/confusionMatrix.dml
@@ -24,23 +24,6 @@
 # After which, it calculates and returns the sum of classifications
 # and the average of each true class.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# P               Matrix[Double]  ---         vector of Predictions
-# Y               Matrix[Double]  ---         vector of Golden standard One Hot Encoded; the one hot
-#                                             encoded vector of actual labels
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                 MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# confusionSum    Matrix[Double]       The Confusion Matrix Sums of classifications
-# confusionAvg    Matrix[Double]       The Confusion Matrix averages of each true class
-# ----------------------------------------------------------------------------------------------------------------------
-# Output is like:
 #                   True Labels
 #                     1    2
 #                 1   TP | FP
@@ -51,7 +34,19 @@
 # FP = False Positives
 # FN = False Negatives
 # TN = True Negatives
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# --------------------------------------------------------------------------------
+# P     vector of Predictions
+# Y     vector of Golden standard One Hot Encoded; the one hot
+#       encoded vector of actual labels
+# --------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------------------------
+# confusionSum   The Confusion Matrix Sums of classifications
+# confusionAvg   The Confusion Matrix averages of each true class
+# ------------------------------------------------------------------------------------------------
 
 m_confusionMatrix = function(Matrix[Double] P, Matrix[Double] Y)
   return(Matrix[Double] confusionSum, Matrix[Double] confusionAvg)
diff --git a/scripts/builtin/cor.dml b/scripts/builtin/cor.dml
index d6a52826dc..8720f93cab 100644
--- a/scripts/builtin/cor.dml
+++ b/scripts/builtin/cor.dml
@@ -21,19 +21,15 @@
 
 # This Function compute correlation matrix
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         A Matrix Input to compute the correlation on
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------
+# X     A Matrix Input to compute the correlation on
+# --------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y               Matrix[Double]              Correlation matrix of the input matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------
+# Y     Correlation matrix of the input matrix
+# --------------------------------------------------------------------------------
 
 m_cor = function(Matrix[Double] X) return (Matrix[Double] Y) {
   Xc = X - colMeans(X);
diff --git a/scripts/builtin/correctTypos.dml b/scripts/builtin/correctTypos.dml
index 6773808180..2fddfaec1b 100644
--- a/scripts/builtin/correctTypos.dml
+++ b/scripts/builtin/correctTypos.dml
@@ -30,23 +30,18 @@
 #   Commun. ACM 7, 3 (March 1964), 171–176. 
 #   DOI:https://doi.org/10.1145/363958.363994
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------------------------------------------------------------------------
-# NAME                       TYPE             DEFAULT  MEANING
-# ------------------------------------------------------------------------------------------------------------------------
-# strings                    Frame[String]    ---      The nx1 input frame of corrupted strings
-# frequency_threshold        Double           0.05     Strings that occur above this frequency level will not be corrected
-# distance_threshold         integer          2        Max distance at which strings are considered similar
-# is_verbose                 Boolean          FALSE    Print debug information
-#
-# ------------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# strings              The nx1 input frame of corrupted strings
+# frequency_threshold  Strings that occur above this frequency level will not be corrected
+# distance_threshold   Max distance at which strings are considered similar
+# is_verbose           Print debug information
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE                    MEANING
-# ------------------------------------------------------------------------------------------------------------------------
-# Y        Frame[String]           Corrected nx1 output frame
-# ------------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# Y     Corrected nx1 output frame
+# ---------------------------------------------------------------------------------------------
 
 # TODO: future: add parameter for list of words that are sure to be correct
 
diff --git a/scripts/builtin/correctTyposApply.dml b/scripts/builtin/correctTyposApply.dml
index 3b8a8b734d..050bb8132b 100644
--- a/scripts/builtin/correctTyposApply.dml
+++ b/scripts/builtin/correctTyposApply.dml
@@ -30,27 +30,23 @@
 #   Commun. ACM 7, 3 (March 1964), 171–176. 
 #   DOI:https://doi.org/10.1145/363958.363994
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------------------------------------------------------------------------
-# NAME                       TYPE             DEFAULT  MEANING
-# ------------------------------------------------------------------------------------------------------------------------
-# strings                    Frame[String]    ---      The nx1 input frame of corrupted strings
-# nullMask                   Matrix[Double]   ---      ---
-# frequency_threshold        Double           0.05     Strings that occur above this frequency level will not be corrected
-# distance_threshold         integer          2        Max distance at which strings are considered similar
-# distance matrix            Matrix[Double]         
-# dict                       Frame[String]        
+# TODO: future: add parameter for list of words that are sure to be correct
 #
-# ------------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# strings              The nx1 input frame of corrupted strings
+# nullMask             ---
+# frequency_threshold  Strings that occur above this frequency level will not be corrected
+# distance_threshold   Max distance at which strings are considered similar
+# distance matrix      ---
+# dict                 ---
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE                    MEANING
-# ------------------------------------------------------------------------------------------------------------------------
-# Y        Frame[String]           Corrected nx1 output frame
-# ------------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# Y     Corrected nx1 output frame
+# ---------------------------------------------------------------------------------------------
 
-# TODO: future: add parameter for list of words that are sure to be correct
 
 s_correctTyposApply = function(Frame[String] strings, Double frequency_threshold = 0.05, Integer distance_threshold = 2, Matrix[Double] distance_matrix, Frame[Unknown] dict)
   return (Frame[String] Y)
diff --git a/scripts/builtin/cox.dml b/scripts/builtin/cox.dml
index 68672d6339..779d193082 100644
--- a/scripts/builtin/cox.dml
+++ b/scripts/builtin/cox.dml
@@ -23,65 +23,61 @@
 # The Breslow method is used for handling ties and the regression parameters 
 # are computed using trust region newton method with conjugate gradient 
 # 
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix   ---        Location to read the input matrix X containing the survival data 
-#                             containing the following information
-#                             1: timestamps 
-#                             2: whether an event occurred (1) or data is censored (0)
-#                             3: feature vectors
-# TE      Matrix   ---        Column indices of X as a column vector which contain timestamp 
-#                             (first row) and event information (second row)
-# F       Matrix   ---        Column indices of X as a column vector which are to be used for 
-#                             fitting the Cox model
-# R       Matrix   ---        If factors (categorical variables) are available in the input matrix
-#                             X, location to read matrix R containing the start and end indices of
-#                             the factors in X
-#                             R[,1]: start indices
-#                             R[,2]: end indices
-#                             Alternatively, user can specify the indices of the baseline level of
-#                             each factor which needs to be removed from X; in this case the start
-#                             and end indices corresponding to the baseline level need to be the same;
-#                             if R is not provided by default all variables are considered to be continuous 
-# alpha   Double   0.05       Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
-# tol     Double   0.000001   Tolerance ("epsilon")
-# moi     Int      100        Max. number of outer (Newton) iterations
-# mii     Int      0          Max. number of inner (conjugate gradient) iterations, 0 = no max   
+# INPUT:
+# -------------------------------------------------------------------------------------------------
+# X      Location to read the input matrix X containing the survival data 
+#        containing the following information
+#        1: timestamps 
+#        2: whether an event occurred (1) or data is censored (0)
+#        3: feature vectors
+# TE     Column indices of X as a column vector which contain timestamp 
+#        (first row) and event information (second row)
+# F      Column indices of X as a column vector which are to be used for 
+#        fitting the Cox model
+# R      If factors (categorical variables) are available in the input matrix
+#        X, location to read matrix R containing the start and end indices of
+#        the factors in X
+#        R[,1]: start indices
+#        R[,2]: end indices
+#        Alternatively, user can specify the indices of the baseline level of
+#        each factor which needs to be removed from X; in this case the start
+#        and end indices corresponding to the baseline level need to be the same;
+#        if R is not provided by default all variables are considered to be continuous 
+# alpha  Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
+# tol    Tolerance ("epsilon")
+# moi    Max. number of outer (Newton) iterations
+# mii    Max. number of inner (conjugate gradient) iterations, 0 = no max   
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M         Matrix[Double]   A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
-#                            M[,1]: betas
-#                            M[,2]: exp(betas)
-#                            M[,3]: standard error of betas
-#                            M[,4]: Z
-#                            M[,5]: P-value
-#                            M[,6]: lower 100*(1-alpha)% confidence interval of betas
-#                            M[,7]: upper 100*(1-alpha)% confidence interval of betas
-# S,T       Matrix[Double]    Two matrices containing a summary of some statistics of the fitted model:
-#                            1 - File S with the following format
-#                            - row 1: no. of observations
-#                            - row 2: no. of events
-#                            - row 3: log-likelihood
-#                            - row 4: AIC
-#                            - row 5: Rsquare (Cox & Snell)
-#                            - row 6: max possible Rsquare
-#                            2 - File T with the following format
-#                            - row 1: Likelihood ratio test statistic, degree of freedom, P-value
-#                            - row 2: Wald test statistic, degree of freedom, P-value
-#                            - row 3: Score (log-rank) test statistic, degree of freedom, P-value
-# RT,XO,COV Matrix[Double]   Additionally, the following matrices are stored (needed for prediction)
-#                            1- A column matrix RT that contains the order-preserving recoded timestamps from X
-#                            2- Matrix XO which is matrix X with sorted timestamps
-#                            3- Variance-covariance matrix of the betas COV
-#                            4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------------
+# M          A D x 7 matrix M, where D denotes the number of covariates, with the following schema:
+#            M[,1]: betas
+#            M[,2]: exp(betas)
+#            M[,3]: standard error of betas
+#            M[,4]: Z
+#            M[,5]: P-value
+#            M[,6]: lower 100*(1-alpha)% confidence interval of betas
+#            M[,7]: upper 100*(1-alpha)% confidence interval of betas
+# S,T         Two matrices containing a summary of some statistics of the fitted model:
+#            1 - File S with the following format
+#            - row 1: no. of observations
+#            - row 2: no. of events
+#            - row 3: log-likelihood
+#            - row 4: AIC
+#            - row 5: Rsquare (Cox & Snell)
+#            - row 6: max possible Rsquare
+#            2 - File T with the following format
+#            - row 1: Likelihood ratio test statistic, degree of freedom, P-value
+#            - row 2: Wald test statistic, degree of freedom, P-value
+#            - row 3: Score (log-rank) test statistic, degree of freedom, P-value
+# RT,XO,COV  Additionally, the following matrices are stored (needed for prediction)
+#            1- A column matrix RT that contains the order-preserving recoded timestamps from X
+#            2- Matrix XO which is matrix X with sorted timestamps
+#            3- Variance-covariance matrix of the betas COV
+#            4- A column matrix MF that contains the column indices of X with the baseline factors removed (if available)
+# ------------------------------------------------------------------------------------------------------
 
 m_cox = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] F, Matrix[Double] R,
     Double alpha = 0.05, Double tol = 0.000001, Integer moi = 100, Integer mii = 0)
diff --git a/scripts/builtin/cspline.dml b/scripts/builtin/cspline.dml
index f3d376e098..eb8e1fdc5d 100644
--- a/scripts/builtin/cspline.dml
+++ b/scripts/builtin/cspline.dml
@@ -24,27 +24,23 @@
 # Algorithms: implement https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline
 # It use natural spline with q1''(x0) == qn''(xn) == 0.0
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE           DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X      Matrix[Double]  ---       1-column matrix of x values knots. It is assumed that x values are
-#                                  monotonically increasing and there is no duplicates points in X
-# Y      Matrix[Double]  ---       1-column matrix of corresponding y values knots
-# inp_x  Double          ---       the given input x, for which the cspline will find predicted y
-# mode   String          "DS"      Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
-# tol    Double          -1.0      Tolerance (epsilon); conjugate graduent procedure terminates early if
-#                                  L2 norm of the beta-residual is less than tolerance * its initial norm
-# maxi   Integer         -1        Maximum number of conjugate gradient iterations, 0 = no maximum
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# X      1-column matrix of x values knots. It is assumed that x values are
+#        monotonically increasing and there is no duplicates points in X
+# Y      1-column matrix of corresponding y values knots
+# inp_x  the given input x, for which the cspline will find predicted y
+# mode   Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
+# tol    Tolerance (epsilon); conjugate graduent procedure terminates early if
+#        L2 norm of the beta-residual is less than tolerance * its initial norm
+# maxi   Maximum number of conjugate gradient iterations, 0 = no maximum
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# pred_Y Matrix[Double]      Predicted value
-# K      Matrix[Double]      Matrix of k parameters
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------------
+# pred_Y  Predicted value
+# K       Matrix of k parameters
+# ---------------------------------------------------------------------------------------------------
 
 m_cspline = function(Matrix[Double] X, Matrix[Double] Y, Double inp_x, 
 String mode = "DS", Double tol = -1.0, Integer maxi = -1)
diff --git a/scripts/builtin/csplineCG.dml b/scripts/builtin/csplineCG.dml
index 422a8ea049..37d557b8a1 100644
--- a/scripts/builtin/csplineCG.dml
+++ b/scripts/builtin/csplineCG.dml
@@ -19,28 +19,24 @@
 #
 #-------------------------------------------------------------
 
-# Builtin that solves cubic spline interpolation using conjucate gradient algorithm
+# Builtin that solves cubic spline interpolation using conjugate gradient algorithm
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE           DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X     Matrix[Double]  ---      1-column matrix of x values knots. It is assumed that x values are
-#                                  monotonically increasing and there is no duplicates points in X
-# Y     Matrix[Double]  ---      1-column matrix of corresponding y values knots
-# inp_x Double          ---      the given input x, for which the cspline will find predicted y.
-# tol   Double          0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if
-#                                L2 norm of the beta-residual is less than tolerance * its initial norm
-# maxi  Integer         0        Maximum number of conjugate gradient iterations, 0 = no maximum
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------------
+# X      1-column matrix of x values knots. It is assumed that x values are
+#          monotonically increasing and there is no duplicates points in X
+# Y      1-column matrix of corresponding y values knots
+# inp_x  the given input x, for which the cspline will find predicted y.
+# tol    Tolerance (epsilon); conjugate graduent procedure terminates early if
+#        L2 norm of the beta-residual is less than tolerance * its initial norm
+# maxi   Maximum number of conjugate gradient iterations, 0 = no maximum
+# ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# pred_Y Matrix[Double]   Predicted value
-# K      Matrix[Double]   Matrix of k parameters
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------------
+# pred_Y  Predicted value
+# K       Matrix of k parameters
+# ------------------------------------------------------------------------------------------------------
 
 m_csplineCG = function (Matrix[Double] X, Matrix[Double] Y, Double inp_x, Double tol = 0.000001, Integer maxi = 0) 
   return (Matrix[Double] pred_Y, Matrix[Double] K) 
diff --git a/scripts/builtin/csplineDS.dml b/scripts/builtin/csplineDS.dml
index 87ff58e67f..647be16d0f 100644
--- a/scripts/builtin/csplineDS.dml
+++ b/scripts/builtin/csplineDS.dml
@@ -21,24 +21,19 @@
 
 # Builtin that solves cubic spline interpolation using a direct solver.
 #
-# INPUT PARAMETERS:
+# INPUT:
+# ------------------------------------------------------------------
+# X       1-column matrix of x values knots. It is assumed that x values are
+#         monotonically increasing and there is no duplicates points in X
+# Y       1-column matrix of corresponding y values knots
+# inp_x   the given input x, for which the cspline will find predicted y.
 # --------------------------------------------------------------------------------------------
-# NAME   TYPE              DEFAULT  MEANING
-# --------------------------------------------------------------------------------------------
-# X      Matrix[Double]    ---      1-column matrix of x values knots. It is assumed that x values are
-#                                   monotonically increasing and there is no duplicates points in X
-# Y      Matrix[Double]    ---      1-column matrix of corresponding y values knots
-# inp_x  Double            ---      the given input x, for which the cspline will find predicted y.
-#
-# ----------------------------------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# pred_y Matrix[Double]   Predicted value
-# K      Matrix[Double]   Matrix of k parameters
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------------
+# pred_y  Predicted value
+# K       Matrix of k parameters
+# ------------------------------------------------------------------------------------------------------
 
 m_csplineDS = function (Matrix[Double] X, Matrix[Double] Y, Double inp_x) 
   return (Matrix[Double] pred_Y, Matrix[Double] K) 
diff --git a/scripts/builtin/cvlm.dml b/scripts/builtin/cvlm.dml
index acf2caedbc..b30dccb0a7 100644
--- a/scripts/builtin/cvlm.dml
+++ b/scripts/builtin/cvlm.dml
@@ -23,25 +23,21 @@
 # validation method. It uses lm and lmPredict functions to solve the linear regression and to predict the class of a
 # feature vector with no intercept, shifting, and rescaling.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    ---      Recorded Data set into matrix
-# y               Matrix[Double]    ---      1-column matrix of response values.
-# k               Integer           ---      Number of subsets needed, It should always be more than 1 and less than nrow(X)
-# icpt            Integer           0        Intercept presence, shifting and rescaling the columns of X
-# reg             Double            1e-7     Regularization constant (lambda) for L2-regularization. set to nonzero for
-#                                            highly dependant/sparse/numerous features
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------
+# X      Recorded Data set into matrix
+# y      1-column matrix of response values.
+# k      Number of subsets needed, It should always be more than 1 and less than nrow(X)
+# icpt   Intercept presence, shifting and rescaling the columns of X
+# reg    Regularization constant (lambda) for L2-regularization. set to nonzero for
+#        highly dependant/sparse/numerous features
+# ----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# y_predict       Matrix[Double]        Response values
-# allbeta         Matrix[Double]        Validated data set
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# y_predict   Response values
+# allbeta     Validated data set
+# --------------------------------------------------------------------------------------------
 
 m_cvlm = function(Matrix[Double] X, Matrix[Double] y, Integer k, Integer icpt = 0, Double reg = 1e-7)
     return (Matrix[Double] y_predict, Matrix[Double] allbeta)
diff --git a/scripts/builtin/dbscan.dml b/scripts/builtin/dbscan.dml
index 9e1d5101f0..69c6887e67 100644
--- a/scripts/builtin/dbscan.dml
+++ b/scripts/builtin/dbscan.dml
@@ -21,22 +21,18 @@
 
 # Implements the DBSCAN clustering algorithm using Euclidian distance matrix
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------
-# X         Matrix[Double]   ---        The input Matrix to do DBSCAN on.
-# eps       Double           0.5        Maximum distance between two points for one to be considered reachable for the other.
-# minPts    Int              5          Number of points in a neighborhood for a point to be considered as a core point
-#                                       (includes the point itself).
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------
+# X          The input Matrix to do DBSCAN on.
+# eps        Maximum distance between two points for one to be considered reachable for the other.
+# minPts     Number of points in a neighborhood for a point to be considered as a core point
+#            (includes the point itself).
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# clusterMembers  Matrix[Double]        clustering Matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------
+# clusterMembers  clustering Matrix
+# ------------------------------------------------------------------------------------------------
 
 m_dbscan = function (Matrix[Double] X, Double eps = 0.5, Integer minPts = 5)
     return (Matrix[Double] X, Matrix[Double] clusterModel, Double eps)
diff --git a/scripts/builtin/dbscanApply.dml b/scripts/builtin/dbscanApply.dml
index e55ee8bd99..4a7eb7e6ed 100644
--- a/scripts/builtin/dbscanApply.dml
+++ b/scripts/builtin/dbscanApply.dml
@@ -18,23 +18,24 @@
 # under the License.
 #
 #-------------------------------------------------------------
-#
+
 # Implements the outlier detection/prediction algorithm using a DBScan model
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------
-# NAME              TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------
-# X             Matrix[Double]   ---        The input Matrix to do outlier detection on.
-# clusterModel      Matrix[Double]   ---        Model of clusters to predict outliers against.
-# eps               Double           0.5        Maximum distance between two points for one to be considered reachable for the other.
-
-# OUTPUT PARAMETERS:
-# ----------------------------------------------------------------------------
-# NAME              TYPE             DEFAULT    MEANING
-# ----------------------------------------------------------------------------
-# outlierPoints     Matrix[Double]   ---        Predicted outliers
-
+# INPUT:
+# ---------------------------------------------
+# NAME           MEANING
+# ---------------------------------------------
+# X              The input Matrix to do outlier detection on.
+# clusterModel   Model of clusters to predict outliers against.
+# eps            Maximum distance between two points for one to be considered reachable for the other.
+# ---------------------------------------------
+#
+# OUTPUT:
+# ----------------------------------------------
+# NAME            MEANING
+# ----------------------------------------------
+# outlierPoints   Predicted outliers
+# ----------------------------------------------
 
 m_dbscanApply = function (Matrix[Double] X, Matrix[Double] clusterModel, Double eps)
   return (Matrix[Double] cluster, Matrix[Double] outlierPoints)
diff --git a/scripts/builtin/decisionTree.dml b/scripts/builtin/decisionTree.dml
index e2f1e5a5a9..fc436545c3 100644
--- a/scripts/builtin/decisionTree.dml
+++ b/scripts/builtin/decisionTree.dml
@@ -21,43 +21,39 @@
 
 # Builtin script implementing classification trees with scale and categorical features
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---      Feature matrix X; note that X needs to be both recoded and dummy coded
-# Y       Matrix[Double]   ---      Label matrix Y; note that Y needs to be both recoded and dummy coded
-# R       Matrix[Double]   " "      Matrix R which for each feature in X contains the following information
-#                                   - R[1,]: Row Vector which indicates if feature vector is scalar or categorical. 1 indicates
-#                                   a scalar feature vector, other positive Integers indicate the number of categories
-#                                   If R is not provided by default all variables are assumed to be scale
-# bins    Integer          20       Number of equiheight bins per scale feature to choose thresholds
-# depth   Integer          25       Maximum depth of the learned tree
-# verbose Boolean          FALSE    boolean specifying if the algorithm should print information while executing
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------------
+# X         Feature matrix X; note that X needs to be both recoded and dummy coded
+# Y         Label matrix Y; note that Y needs to be both recoded and dummy coded
+# R         Matrix R which for each feature in X contains the following information
+#           - R[1,]: Row Vector which indicates if feature vector is scalar or categorical. 1 indicates
+#           a scalar feature vector, other positive Integers indicate the number of categories
+#           If R is not provided by default all variables are assumed to be scale
+# bins      Number of equiheight bins per scale feature to choose thresholds
+# depth     Maximum depth of the learned tree
+# verbose   boolean specifying if the algorithm should print information while executing
+# ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M      Matrix[Double]             Matrix M where each column corresponds to a node in the learned tree and each row
-#                                   contains the following information:
-#                                   M[1,j]: id of node j (in a complete binary tree)
-#                                   M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#                                   M[3,j]: Feature index of the feature (scale feature id if the feature is scale or
-#                                   categorical feature id if the feature is categorical)
-#                                   that node j looks at if j is an internal node, otherwise 0
-#                                   M[4,j]: Type of the feature that node j looks at if j is an internal node: holds
-#                                   the same information as R input vector
-#                                   M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale,
-#                                   otherwise the size of the subset of values
-#                                   stored in rows 6,7,... if j is categorical
-#                                   If j is a leaf node: number of misclassified samples reaching at node j
-#                                   M[6:,j]: If j is an internal node: Threshold the example's feature value is compared
-#                                   to is stored at M[6,j] if the feature chosen for j is scale,
-#                                   otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
-#                                   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# M      Matrix M where each column corresponds to a node in the learned tree and each row
+#        contains the following information:
+#        M[1,j]: id of node j (in a complete binary tree)
+#        M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+#        M[3,j]: Feature index of the feature (scale feature id if the feature is scale or
+#        categorical feature id if the feature is categorical)
+#        that node j looks at if j is an internal node, otherwise 0
+#        M[4,j]: Type of the feature that node j looks at if j is an internal node: holds
+#        the same information as R input vector
+#        M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale,
+#        otherwise the size of the subset of values
+#        stored in rows 6,7,... if j is categorical
+#        If j is a leaf node: number of misclassified samples reaching at node j
+#        M[6:,j]: If j is an internal node: Threshold the example's feature value is compared
+#        to is stored at M[6,j] if the feature chosen for j is scale,
+#        otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
+#        If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# -------------------------------------------------------------------------------------------
 
 m_decisionTree = function(
   Matrix[Double] X,
diff --git a/scripts/builtin/decisionTreePredict.dml b/scripts/builtin/decisionTreePredict.dml
index 48c7f6f08e..b98ffef19b 100644
--- a/scripts/builtin/decisionTreePredict.dml
+++ b/scripts/builtin/decisionTreePredict.dml
@@ -23,39 +23,34 @@
 # Builtin script implementing prediction based on classification trees with scale features using prediction methods of the
 # Hummingbird paper (https://www.usenix.org/system/files/osdi20-nakandala.pdf).
 #
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-#  NAME      TYPE                   MEANING
-# ---------------------------------------------------------------------------------------------
-#  M         Matrix[Double]         Decision tree matrix M, as generated by scripts/builtin/decisionTree.dml, where each column corresponds 
-#                                   to a node in the learned tree and each row contains the following information:
-#                                   M[1,j]: id of node j (in a complete binary tree)
-#                                   M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#                                   M[3,j]: Feature index of the feature (scale feature id if the feature is scale or
-#                                   categorical feature id if the feature is categorical)
-#                                   that node j looks at if j is an internal node, otherwise 0
-#                                   M[4,j]: Type of the feature that node j looks at if j is an internal node: holds
-#                                   the same information as R input vector
-#                                   M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale,
-#                                   otherwise the size of the subset of values
-#                                   stored in rows 6,7,... if j is categorical
-#                                   If j is a leaf node: number of misclassified samples reaching at node j
-#                                   M[6:,j]: If j is an internal node: Threshold the example's feature value is compared
-#                                   to is stored at M[6,j] if the feature chosen for j is scale,
-#                                   otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
-#                                   If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# INPUT:
+# ------------------------------------------------------------------------
+# M           Decision tree matrix M, as generated by scripts/builtin/decisionTree.dml, where each column corresponds 
+#             to a node in the learned tree and each row contains the following information:
+#             M[1,j]: id of node j (in a complete binary tree)
+#             M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+#             M[3,j]: Feature index of the feature (scale feature id if the feature is scale or
+#             categorical feature id if the feature is categorical)
+#             that node j looks at if j is an internal node, otherwise 0
+#             M[4,j]: Type of the feature that node j looks at if j is an internal node: holds
+#             the same information as R input vector
+#             M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale,
+#             otherwise the size of the subset of values
+#             stored in rows 6,7,... if j is categorical
+#             If j is a leaf node: number of misclassified samples reaching at node j
+#             M[6:,j]: If j is an internal node: Threshold the example's feature value is compared
+#             to is stored at M[6,j] if the feature chosen for j is scale,
+#             otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
+#             If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+# X           Feature matrix X
+# strategy    Prediction strategy, can be one of ["GEMM", "TT", "PTT"], referring to "Generic matrix multiplication", 
+#             "Tree traversal", and "Perfect tree traversal", respectively
+# ----------------------------------------------------------------------
 #
-#  X         Matrix[Double]        Feature matrix X
-#
-#  strategy  String                Prediction strategy, can be one of ["GEMM", "TT", "PTT"], referring to "Generic matrix multiplication", 
-#                                     "Tree traversal", and "Perfect tree traversal", respectively
-# -------------------------------------------------------------------------------------------
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-#  NAME     TYPE                    MEANING
-# ---------------------------------------------------------------------------------------------
-#  Y        Matrix[Double]          Matrix containing the predicted labels for X 
-# ---------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------
+# Y     Matrix containing the predicted labels for X 
+# ------------------------------------------------------------------
 
 m_decisionTreePredict = function(Matrix[Double] M, Matrix[Double] X, String strategy)
   return (Matrix[Double] Y) 
diff --git a/scripts/builtin/deepWalk.dml b/scripts/builtin/deepWalk.dml
index 447ee6e51e..3579c3f83a 100644
--- a/scripts/builtin/deepWalk.dml
+++ b/scripts/builtin/deepWalk.dml
@@ -21,25 +21,21 @@
 
 # This script performs DeepWalk on a given graph (https://arxiv.org/pdf/1403.6652.pdf)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Graph     Matrix[Double]      ---         adjacency matrix of a graph (n x n)
-# w         Integer             ---         window size
-# d         Integer             ---         embedding size
-# gamma     Integer             ---         walks per vertex
-# t         Integer             ---         walk length
-# alpha     Double              0.025       learning rate
-# beta      Double              0.9         factor for decreasing learning rate
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# Graph   adjacency matrix of a graph (n x n)
+# w       window size
+# d       embedding size
+# gamma   walks per vertex
+# t       walk length
+# alpha   learning rate
+# beta    factor for decreasing learning rate
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Phi     Matrix[Double]            matrix of vertex/word representation (n x d)
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# Phi   matrix of vertex/word representation (n x d)
+# ------------------------------------------------------------------------------------------
 
 source("scripts/staging/entity-resolution/primitives/postprocessing.dml") as post;
 
diff --git a/scripts/builtin/denialConstraints.dml b/scripts/builtin/denialConstraints.dml
index 3d221ab650..23453979e1 100644
--- a/scripts/builtin/denialConstraints.dml
+++ b/scripts/builtin/denialConstraints.dml
@@ -21,30 +21,6 @@
 
 # This function considers some constraints indicating statements that can NOT happen in the data (denial constraints).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE      DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# dataFrame         Frame      ---    frame which columns represent the variables of the data and the rows correspond
-#                                     to different tuples or instances.
-#                                     Recommended to have a column indexing the instances from 1 to N (N=number of instances).
-# constraintsFrame  Frame      ---    frame with fixed columns and each row representing one constraint.
-#                                     1. idx: (double) index of the constraint, from 1 to M (number of constraints)
-#                                       2. constraint.type: (string) The constraints can be of 3 different kinds:
-#                                           - variableCompare: for each instance, it will compare the values of two variables (with a relation <, > or =).
-#                                           - valueCompare: for each instance, it will compare a fixed value and a variable value (with a relation <, > or =).
-#                                           - instanceCompare: for every couple of instances, it will compare the relation between two variables, 
-#                                              ie  if the value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2, 
-#                                             then the value of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2.
-#                                       3. group.by: (boolean) if TRUE only one group of data (defined by a variable option) will be considered for the constraint.
-#                                       4. group.variable: (string, only if group.by TRUE) name of the variable (column in dataFrame) that will divide our data in groups.
-#                                       5. group.option: (only if group.by TRUE) option of the group.variable that defines the group to consider.
-#                                       6. variable1: (string) first variable to compare (name of column in dataFrame).
-#                                       7. relation: (string) can be < , > or = in the case of variableCompare and valueCompare, and < >, < < , > < or > > 
-#                                         in the case of instanceCompare
-#                                       8. variable2: (string) second variable to compare (name of column in dataFrame) or fixed value for the case of valueCompare.
-#                                        
-# ----------------------------------------------------------------------------------------------------------------------
 #   EXAMPLE:
 #   dataFrame:
 #
@@ -74,17 +50,35 @@
 #
 #   Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------------------
+# dataFrame         frame which columns represent the variables of the data and the rows correspond
+#                   to different tuples or instances.
+#                   Recommended to have a column indexing the instances from 1 to N (N=number of instances).
+# constraintsFrame  frame with fixed columns and each row representing one constraint.
+#                   1. idx: (double) index of the constraint, from 1 to M (number of constraints)
+#                   2. constraint.type: (string) The constraints can be of 3 different kinds:
+#                       - variableCompare: for each instance, it will compare the values of two variables (with a relation <, > or =).
+#                       - valueCompare: for each instance, it will compare a fixed value and a variable value (with a relation <, > or =).
+#                       - instanceCompare: for every couple of instances, it will compare the relation between two variables, 
+#                          ie  if the value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2, 
+#                         then the value of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2.
+#                   3. group.by: (boolean) if TRUE only one group of data (defined by a variable option) will be considered for the constraint.
+#                   4. group.variable: (string, only if group.by TRUE) name of the variable (column in dataFrame) that will divide our data in groups.
+#                   5. group.option: (only if group.by TRUE) option of the group.variable that defines the group to consider.
+#                   6. variable1: (string) first variable to compare (name of column in dataFrame).
+#                   7. relation: (string) can be < , > or = in the case of variableCompare and valueCompare, and < >, < < , > < or > > 
+#                     in the case of instanceCompare
+#                   8. variable2: (string) second variable to compare (name of column in dataFrame) or fixed value for the case of valueCompare.
+# ----------------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE                   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# WrongInstances    Matrix[double]         Matrix of 2 columns.
-#                                         - First column shows the indexes of dataFrame that are wrong.
-#                                         - Second column shows the index of the denial constraint that is fulfilled
-#                                          If there are no wrong instances to show (0 constrains fulfilled) --> WrongInstances=matrix(0,1,2)
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------
+# WrongInstances   Matrix of 2 columns.
+#                  - First column shows the indexes of dataFrame that are wrong.
+#                  - Second column shows the index of the denial constraint that is fulfilled
+#                  If there are no wrong instances to show (0 constrains fulfilled) --> WrongInstances=matrix(0,1,2)
+# ----------------------------------------------------------------------------------------------
 
 s_denialConstraints = function(Frame[Unknown] dataFrame, Frame[Unknown] constraintsFrame)
 return(Matrix[double] WrongInstances)
diff --git a/scripts/builtin/discoverFD.dml b/scripts/builtin/discoverFD.dml
index a1a4044c5c..9bc18e3c06 100644
--- a/scripts/builtin/discoverFD.dml
+++ b/scripts/builtin/discoverFD.dml
@@ -21,23 +21,18 @@
 
 # Implements builtin for finding functional dependencies
 # 
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    --       Input Matrix X, encoded Matrix if data is categorical
-# Mask            Matrix[Double]    --       A row vector for interested features i.e. Mask =[1, 0, 1]
-#                                             will exclude the second column from processing
-# threshold       Matrix[Double]    --       threshold value in interval [0, 1] for robust FDs 
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X          Input Matrix X, encoded Matrix if data is categorical
+# Mask       A row vector for interested features i.e. Mask =[1, 0, 1]
+#             will exclude the second column from processing
+# threshold  threshold value in interval [0, 1] for robust FDs 
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# FD              Matrix[Double]   matrix of functional dependencies
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# FD    matrix of functional dependencies
+# -------------------------------------------------------------------------------------------
 
 m_discoverFD = function(Matrix[Double] X, Matrix[Double] Mask, Double threshold)
   return(Matrix[Double] FD)
diff --git a/scripts/builtin/dist.dml b/scripts/builtin/dist.dml
index 136efbdcae..197f9d8b07 100644
--- a/scripts/builtin/dist.dml
+++ b/scripts/builtin/dist.dml
@@ -19,21 +19,17 @@
 #
 #-------------------------------------------------------------
 
-# Returns Euclidian distance matrix (distances between N n-dimensional points)
+# Returns Euclidean distance matrix (distances between N n-dimensional points)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X          Matrix[Double]    ---      ---
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------
+# X       Matrix to calculate the distance inside
+# --------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y          Matrix[Double]     Euclidian distance matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------
+# Y      Euclidean distance matrix
+# -----------------------------------------------------------------------------------------------
 
 m_dist = function(Matrix[Double] X) return (Matrix[Double] Y) {
   G = X %*% t(X);
diff --git a/scripts/builtin/dmv.dml b/scripts/builtin/dmv.dml
index 77528b7169..271147834b 100644
--- a/scripts/builtin/dmv.dml
+++ b/scripts/builtin/dmv.dml
@@ -21,23 +21,18 @@
 
 # The dmv-function is used to find disguised missing values utilising syntactical pattern recognition.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE           DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X           Frame[String]   ---     Input Frame
-# threshold   Double          0.8     Threshold value in interval [0, 1] for dominant pattern per column (e.g., 0.8 means
-#                                     that 80% of the entries per column must adhere this pattern to be dominant)
-# replace     String          "NA"    The string disguised missing values are replaced with
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# X          Input Frame
+# threshold  Threshold value in interval [0, 1] for dominant pattern per column (e.g., 0.8 means
+#            that 80% of the entries per column must adhere this pattern to be dominant)
+# replace    The string disguised missing values are replaced with
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y         Frame[String]    Frame X including detected disguised missing values
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------
+# Y     Frame X including detected disguised missing values
+# -------------------------------------------------------------------------------------------------
 
 s_dmv = function(Frame[String] X, Double threshold=0.8, String replace="NA") return (Frame[String] Y) {
   if( threshold < 0 | threshold > 1 )
diff --git a/scripts/builtin/ema.dml b/scripts/builtin/ema.dml
index 508c146f36..4f9c2ec7f2 100644
--- a/scripts/builtin/ema.dml
+++ b/scripts/builtin/ema.dml
@@ -21,26 +21,22 @@
 
 # This function imputes values with exponential moving average (single, double or triple).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE           DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X           Frame[Double]  ---      Frame that contains timeseries data that needs to be imputed
-#                                     search_iterations	Integer	--	Budget iterations for parameter optimisation,
-#                                     used if parameters weren't set
-# mode        String         ---      Type of EMA method. Either "single", "double" or "triple"
-# freq        Double         ---      Seasonality when using triple EMA.
-# alpha       Double         ---      alpha- value for EMA
-# beta        Double         ---      beta- value for EMA
-# gamma       Double         ---      gamma- value for EMA
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X       Frame that contains time series data that needs to be imputed
+#         search_iterations	Integer	--	Budget iterations for parameter optimization,
+#         used if parameters weren't set
+# mode    Type of EMA method. Either "single", "double" or "triple"
+# freq    Seasonality when using triple EMA.
+# alpha   alpha- value for EMA
+# beta    beta- value for EMA
+# gamma   gamma- value for EMA
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M          Frame[Double]     Frame with EMA results
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------
+# M     Frame with EMA results
+# -----------------------------------------------------------------------------------------------
 
 # TODO: convert to DML builtin using cumsumprod(data, alpha) 
 
diff --git a/scripts/builtin/executePipeline.dml b/scripts/builtin/executePipeline.dml
index 9eae1a8a74..d6729e72c5 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -21,37 +21,33 @@
 
 # This function execute pipeline.
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                TYPE               DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# logical             Frame[String]      NULL     ---
-# pipeline            Frame[String]      ---      ---
-# X                   Matrix[Double]     ---      ---
-# Y                   Matrix[Double]     ---      ---
-# Xtest               Matrix[Double]     ---      ---
-# Ytest               Matrix[Double]     ---      ---
-# metaList            List[Unknown]      ---      ---
-# hyperParameters     Matrix[Double]     ---      ---
-# hpForPruning        Matrix[Double]      0       ---
-# changesByOp         Matrix[Double]      0       ---
-# flagsCount          Integer            ---      ---
-# test                Boolean             FALSE   ---
-# verbose             Boolean            ---      ---
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# logical           ---
+# pipeline          ---
+# X                 ---
+# Y                 ---
+# Xtest             ---
+# Ytest             ---
+# metaList          ---
+# hyperParameters   ---
+# hpForPruning      ---
+# changesByOp       ---
+# flagsCount        ---
+# test              ---
+# verbose           ---
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                   Matrix[Double]             ---
-# Y                   Matrix[Double]             ---
-# Xtest               Matrix[Double]             ---
-# Ytest               Matrix[Double]             ---
-# t2                  Double                     ---
-# hpForPruning        Matrix[Double]             ---
-# changesByOp         Matrix[Double]             ---
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
+# X              ---
+# Y              ---
+# Xtest          ---
+# Ytest          ---
+# t2             ---
+# hpForPruning   ---
+# changesByOp    ---
+# --------------------------------------------------------------------------------------
 
 s_executePipeline = function(Frame[String] pipeline, Matrix[Double] Xtrain,  Matrix[Double] Ytrain, 
   Matrix[Double] Xtest,  Matrix[Double] Ytest, List[Unknown] metaList, Matrix[Double] hyperParameters, Matrix[Double] hpForPruning = as.matrix(0),
diff --git a/scripts/builtin/ffPredict.dml b/scripts/builtin/ffPredict.dml
index 98a21def5b..4a3b49617d 100644
--- a/scripts/builtin/ffPredict.dml
+++ b/scripts/builtin/ffPredict.dml
@@ -21,22 +21,17 @@
 
 # This builtin function makes prediction given data and trained feedforward neural network model
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Model       List[unknown]    ---      Trained ff neural network model
-# X           Matrix[Double]   ---      Data used for making predictions
-# batch_size  Integer          128      Batch size
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------------
+# Model       Trained ff neural network model
+# X           Data used for making predictions
+# batch_size  Batch size
+# --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# pred        Double                    Predicted value
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------
+# pred   Predicted value
+# ---------------------------------------------------------------------------------------
 
 source("nn/layers/feedForward.dml") as ff_pass
 
diff --git a/scripts/builtin/ffTrain.dml b/scripts/builtin/ffTrain.dml
index 351538521c..e3e7833008 100644
--- a/scripts/builtin/ffTrain.dml
+++ b/scripts/builtin/ffTrain.dml
@@ -21,33 +21,29 @@
 
 # This builtin function trains simple feed-forward neural network. The architecture of the
 # networks is: affine1 -> relu -> dropout -> affine2 -> configurable output activation function.
-# Hidden layer has 128 neurons. Dropout rate is 0.35. Input and ouptut sizes are inferred from X and Y.
+# Hidden layer has 128 neurons. Dropout rate is 0.35. Input and output sizes are inferred from X and Y.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                 Matrix[Double]    ---       Training data
-# Y                 Matrix[Double]    ---       Labels/Target values
-# batch_size        Integer           64        Batch size
-# epochs            Integer           20        Number of epochs
-# learning_rate     Double            0.003     Learning rate
-# out_activation    String            ---       User specified ouptut activation function. Possible values:
-#                                               "sigmoid", "relu", "lrelu", "tanh", "softmax", "logits" (no activation).
-# loss_fcn          String            ---       User specified loss function. Possible values:
-#                                               "l1", "l2", "log_loss", "logcosh_loss", "cel" (cross-entropy loss).
-# shuffle           Boolean           FALSE     Flag which indicates if dataset should be shuffled or not
-# validation_split  Double            0.0       Fraction of training set used as validation set
-# seed              Integer           -1        Seed for model initialization
-# verbose           Boolean           FALSE     Flag which indicates if function should print to stdout
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X                 Training data
+# Y                 Labels/Target values
+# batch_size        Batch size
+# epochs            Number of epochs
+# learning_rate     Learning rate
+# out_activation    User specified output activation function. Possible values:
+#                   "sigmoid", "relu", "lrelu", "tanh", "softmax", "logits" (no activation).
+# loss_fcn          User specified loss function. Possible values:
+#                   "l1", "l2", "log_loss", "logcosh_loss", "cel" (cross-entropy loss).
+# shuffle           Flag which indicates if dataset should be shuffled or not
+# validation_split  Fraction of training set used as validation set
+# seed              Seed for model initialization
+# verbose           Flag which indicates if function should print to stdout
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# model             List[unknown]               Trained model which can be used in ffPredict
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------
+# model  Trained model which can be used in ffPredict
+# -------------------------------------------------------------------------------
 
 source("nn/layers/affine.dml") as affine
 source("nn/layers/dropout.dml") as dropout
diff --git a/scripts/builtin/fit_pipeline.dml b/scripts/builtin/fit_pipeline.dml
index d67af62739..4b4665e59e 100644
--- a/scripts/builtin/fit_pipeline.dml
+++ b/scripts/builtin/fit_pipeline.dml
@@ -22,30 +22,29 @@
 # This script will read the dirty and clean data, then it will apply the best pipeline on dirty data
 # and then will classify both cleaned dataset and check if the cleaned dataset is performing same as original dataset
 # in terms of classification accuracy
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE               DEFAULT            MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# trainData         Frame[Unknown]      ---
-# testData          Frame[Unknown]      ---
-# metaData          Frame[Unknown]      as.frame("NULL")
-# lp                Frame[Unknown]      ---
-# pip               Frame[Unknown]      ---
-# hp                Frame[Unknown]      ---
-# evaluationFunc    String              ---
-# evalFunHp         Matrix[Double]      ---
-# isLastLabel       Boolean             TRUE
-# correctTypos      Boolean             FALSE
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------
+# NAME             MEANING
+# -------------------------------------------------------------------------------
+# trainData        ---
+# testData         ---
+# metaData         ---
+# lp               ---
+# pip              ---
+# hp               ---
+# evaluationFunc   ---
+# evalFunHp        ---
+# isLastLabel      ---
+# correctTypos     ---
+# -------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# scores         Matrix[Double]   ---
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------
+# NAME      MEANING
+# ------------------------------------------------------------------------------------------------
+# scores    ---
+# ------------------------------------------------------------------------------------------------
 
 source("scripts/pipelines/scripts/utils.dml") as utils;
 source("scripts/builtin/topk_cleaning.dml") as topk;
diff --git a/scripts/builtin/fixInvalidLengths.dml b/scripts/builtin/fixInvalidLengths.dml
index f6ac2c1569..0e9ec8ba14 100644
--- a/scripts/builtin/fixInvalidLengths.dml
+++ b/scripts/builtin/fixInvalidLengths.dml
@@ -19,25 +19,21 @@
 #
 #-------------------------------------------------------------
 
-# Fix invalid lenghts
+# Fix invalid lengths
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# F1       Frame[Unknown]    ---
-# mask     Matrix[Double]    ---
-# ql       Double            0.05
-# qu       Double            0.99
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------
+# F1    ---
+# mask  ---
+# ql    ---
+# qu    ---
+# -------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# out   Frame[Unknown]    ---
-# M     Matrix[Double]    ---
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------
+# out   ---
+# M     ---
+# -------------------
 
 s_fixInvalidLengths = function(Frame[Unknown] F1, Matrix[Double] mask, Double ql = 0.05, Double qu = 0.99)
 return (Frame[Unknown] X, Matrix[Double] mask, Matrix[Double] qLow, Matrix[Double] qUp)
diff --git a/scripts/builtin/fixInvalidLengthsApply.dml b/scripts/builtin/fixInvalidLengthsApply.dml
index 99ca0b692d..a8c10dc052 100644
--- a/scripts/builtin/fixInvalidLengthsApply.dml
+++ b/scripts/builtin/fixInvalidLengthsApply.dml
@@ -19,25 +19,25 @@
 #
 #-------------------------------------------------------------
 
-# Fix invalid lenghts
+# Fix invalid lengths
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Frame[Unknown]     ---
-# mask     Matrix[Double]    ---
-# ql       Double            0.05
-# qu       Double            0.99
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------
+# NAME   MEANING
+# ------------------------
+# X      ---
+# mask   ---
+# ql     ---
+# qu     ---
+# ------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# out   Frame[Unknown]    ---
-# M     Matrix[Double]    ---
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------
+# NAME  MEANING
+# ------------------------
+# out   ---
+# M     ---
+# ------------------------
 
 s_fixInvalidLengthsApply = function(Frame[Unknown] X, Matrix[Double] mask, Matrix[Double] qLow, Matrix[Double] qUp)
 return (Frame[Unknown] X)
diff --git a/scripts/builtin/frameSort.dml b/scripts/builtin/frameSort.dml
index cf447b4282..fa85a28946 100644
--- a/scripts/builtin/frameSort.dml
+++ b/scripts/builtin/frameSort.dml
@@ -22,28 +22,24 @@
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 # Built-in for sorting frames
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# F         Frame[String]     ---       Data frame of string values
-# mask      Matrix[Double]   ---        matrix for identifying string columns
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------
+# F     Data frame of string values
+# mask  matrix for identifying string columns
+# -------------------------------------------------------------------------------------
 #
 # OUTPUT:
-#----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# f_odered  Frame[String]                sorted dataset by column 1 in decreasing order
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------
+# f_ordered  sorted dataset by column 1 in decreasing order
+# -----------------------------------------------------------------------------------------
 
 s_frameSort = function(Frame[String] F, Matrix[Double] mask, Boolean orderDesc = TRUE)
-return (Frame[String] f_odered)
+return (Frame[String] f_ordered)
 {
   index = vectorToCsv(mask)
   # recode logical pipelines for easy handling
   jspecR = "{ids:true, recode:["+index+"]}";
   [X, M] = transformencode(target=F, spec=jspecR);
   ordered = order(target = X, by = 1, decreasing=orderDesc, index.return=FALSE)
-  f_odered = transformdecode(target=ordered, spec=jspecR, meta=M);
+  f_ordered = transformdecode(target=ordered, spec=jspecR, meta=M);
 }
diff --git a/scripts/builtin/frequencyEncode.dml b/scripts/builtin/frequencyEncode.dml
index b34ec17921..3eb475ba14 100644
--- a/scripts/builtin/frequencyEncode.dml
+++ b/scripts/builtin/frequencyEncode.dml
@@ -19,13 +19,19 @@
 #
 #-------------------------------------------------------------
 
-#######################################################################
 # function frequency conversion
-# Inputs: The input dataset X, and  mask of the columns
-# Output: categorical columns are replaced with their frequencies
-#######################################################################
-
-
+#
+# INPUT:
+# -------------------------------------------------------------------------------------
+# X     dataset x
+# mask  mask of the columns for frequency conversion
+# -------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -----------------------------------------------------------------------------------------
+# X          categorical columns are replaced with their frequencies
+# freqCount  the frequency counts for the different categoricals
+# -----------------------------------------------------------------------------------------
 
 m_frequencyEncode = function(Matrix[Double] X, Matrix[Double] mask)
 return (Matrix[Double] X, Matrix[Double] freqCount) {
diff --git a/scripts/builtin/frequencyEncodeApply.dml b/scripts/builtin/frequencyEncodeApply.dml
index a7e6a67db4..a6fddd16d3 100644
--- a/scripts/builtin/frequencyEncodeApply.dml
+++ b/scripts/builtin/frequencyEncodeApply.dml
@@ -19,6 +19,20 @@
 #
 #-------------------------------------------------------------
 
+# frequency code apply
+#
+# INPUT:
+# -------------------------------------------------------------------------------------
+# X          dataset x
+# freqCount  the frequency counts for the different categoricals
+# -------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -----------------------------------------------------------------------------------------
+# X          categorical columns are replaced with their frequencies given
+# -----------------------------------------------------------------------------------------
+
+
 m_frequencyEncodeApply = function(Matrix[Double] X, Matrix[Double] freqCount)
 return (Matrix[Double] X) {
 
diff --git a/scripts/builtin/garch.dml b/scripts/builtin/garch.dml
index b523b59ae5..58375de551 100644
--- a/scripts/builtin/garch.dml
+++ b/scripts/builtin/garch.dml
@@ -22,36 +22,32 @@
 # This is a builtin function that implements GARCH(1,1), a statistical model used in analyzing time-series data where the variance
 # error is believed to be serially autocorrelated
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                 Matrix[Double]    ---       The input Matrix to apply Arima on.
-# kmax              Integer           ---       Number of iterations
-# momentum          Double            ---       Momentum for momentum-gradient descent (set to 0 to deactivate)
-# start_stepsize    Double            ---       Initial gradient-descent stepsize
-# end_stepsize      Double            ---       gradient-descent stepsize at end (linear descent)
-# start_vicinity    Double            ---       proportion of randomness of restart-location for gradient descent at beginning
-# end_vicinity      Double            ---       same at end (linear decay)
-# sim_seed          Integer           ---       seed for simulation of process on fitted coefficients
-# verbose           Boolean           ---       verbosity, comments during fitting
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# fitted_X          Matrix[Double]     simulated garch(1,1) process on fitted coefficients
-# fitted_var_hist   Matrix[Double]     variances of simulated fitted process
-# best_a0           Double             onstant term of fitted process
-# best_arch_coef    Double             1-st arch-coefficient of fitted process
-# best_var_coef     Double             1-st garch-coefficient of fitted process
-# ----------------------------------------------------------------------------------------------------------------------
-#
 # COMMENTS
 # This has some drawbacks: slow convergence of optimization (sort of simulated annealing/gradient descent)
 # TODO: use BFGS or BHHH if it is available (this are go to methods)
 # TODO: (only then) extend to garch(p,q); otherwise the search space is way too big for the current method
+#
+# INPUT:
+# -----------------------------------------------------------------------------------------
+# X                The input Matrix to apply Arima on.
+# kmax             Number of iterations
+# momentum         Momentum for momentum-gradient descent (set to 0 to deactivate)
+# start_stepsize   Initial gradient-descent stepsize
+# end_stepsize     gradient-descent stepsize at end (linear descent)
+# start_vicinity   proportion of randomness of restart-location for gradient descent at beginning
+# end_vicinity     same at end (linear decay)
+# sim_seed         seed for simulation of process on fitted coefficients
+# verbose          verbosity, comments during fitting
+# -----------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# --------------------------------------------------------------------------------------------------
+# fitted_X         simulated garch(1,1) process on fitted coefficients
+# fitted_var_hist  variances of simulated fitted process
+# best_a0          onstant term of fitted process
+# best_arch_coef   1-st arch-coefficient of fitted process
+# best_var_coef    1-st garch-coefficient of fitted process
+# --------------------------------------------------------------------------------------------------
 
 m_garch = function(Matrix[Double] X, Integer kmax, Double momentum, Double start_stepsize, Double end_stepsize, Double start_vicinity,
   Double end_vicinity, Integer sim_seed, Boolean verbose)
diff --git a/scripts/builtin/gaussianClassifier.dml b/scripts/builtin/gaussianClassifier.dml
index bfdbfc59c5..849f6d5b90 100644
--- a/scripts/builtin/gaussianClassifier.dml
+++ b/scripts/builtin/gaussianClassifier.dml
@@ -22,29 +22,25 @@
 # Computes the parameters needed for Gaussian Classification.
 # Thus it computes the following per class: the prior probability,
 # the inverse covariance matrix, the mean per feature and the determinant
-# of the covariance matrix. Furthermore (if not explicitely defined), it
+# of the covariance matrix. Furthermore (if not explicitly defined), it
 # adds some small smoothing value along the variances, to prevent
 # numerical errors / instabilities.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE               DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# D                     Matrix[Double]     ---      Input matrix (training set)
-# C                     Matrix[Double]     ---      Target vector
-# varSmoothing          Double             1e-9     Smoothing factor for variances
-# verbose               Boolean            TRUE     Print accuracy of the training set
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------
+# D             Input matrix (training set)
+# C             Target vector
+# varSmoothing  Smoothing factor for variances
+# verbose       Print accuracy of the training set
+# ----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# classPriors           Matrix[Double]              Vector storing the class prior probabilities
-# classMeans            Matrix[Double]              Matrix storing the means of the classes
-# classInvCovariances   List[Unknown]               List of inverse covariance matrices
-# determinants          Matrix[Double]              Vector storing the determinants of the classes
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# classPriors           Vector storing the class prior probabilities
+# classMeans            Matrix storing the means of the classes
+# classInvCovariances   List of inverse covariance matrices
+# determinants          Vector storing the determinants of the classes
+# ------------------------------------------------------------------------------------------
 
 m_gaussianClassifier = function(Matrix[Double] D, Matrix[Double] C, Double varSmoothing=1e-9, Boolean verbose = TRUE)
   return (Matrix[Double] classPriors, Matrix[Double] classMeans,
diff --git a/scripts/builtin/getAccuracy.dml b/scripts/builtin/getAccuracy.dml
index 5e385349fc..6738d36e98 100644
--- a/scripts/builtin/getAccuracy.dml
+++ b/scripts/builtin/getAccuracy.dml
@@ -21,21 +21,17 @@
 
 # This builtin function compute the weighted and simple accuracy for given predictions
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# y               Matrix[Double]    ---       Ground truth (Actual Labels)
-# yhat            Matrix[Double]    ---       Predictions (Predicted labels)
-# isWeighted      Boolean           FALSE     Flag for weighted or non-weighted accuracy calculation
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# y           Ground truth (Actual Labels)
+# yhat        Predictions (Predicted labels)
+# isWeighted  Flag for weighted or non-weighted accuracy calculation
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# accuracy        Double              accuracy of the predicted labels
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# accuracy  accuracy of the predicted labels
+# --------------------------------------------------------------------------------------------
 
 m_getAccuracy = function(Matrix[Double] y, Matrix[Double] yhat, Boolean isWeighted = FALSE)
 return (Double accuracy)
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
index cec5975338..c07a98337a 100644
--- a/scripts/builtin/glm.dml
+++ b/scripts/builtin/glm.dml
@@ -22,45 +22,9 @@
 # This script solves GLM regression using NEWTON/FISHER scoring with trust regions. The glm-function is a flexible
 # generalization of ordinary linear regression that allows for response variables that have error distribution models.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X     Matrix[Double]    ---        matrix X of feature vectors
-# Y     Matrix[Double]    ---        matrix Y with either 1 or 2 columns:
-#                                    if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
-# dfam  Int               1          Distribution family code: 1 = Power, 2 = Binomial
-# vpow  Double            0.0        Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                                    0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link  Int               0          Link function code: 0 = canonical (depends on distribution),
-#                                    1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
-# lpow  Double            1.0        Power for Link function defined as (mean)^power (ignored if link != 1):
-#                                    -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# yneg  Double            0.0        Response value for Bernoulli "No" label, usually 0.0 or -1.0
-# icpt  Int               0          Intercept presence, X columns shifting and rescaling:
-#                                    0 = no intercept, no shifting, no rescaling;
-#                                    1 = add intercept, but neither shift nor rescale X;
-#                                    2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg   Double            0.0        Regularization parameter (lambda) for L2 regularization
-# tol   Double            0.000001   Tolerance (epsilon)
-# disp  Double            0.0        (Over-)dispersion value, or 0.0 to estimate it from data
-# moi   Int               200        Maximum number of outer (Newton / Fisher Scoring) iterations
-# mii   Int               0          Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-#
-# ----------------------------------------------------------------------------------------------------------------------
-# 
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# beta   Matrix[Double]           Matrix beta, whose size depends on icpt:
-#                                 icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
-#-----------------------------------------------------------------------------------------------------------------------
-#
 # In addition, some GLM statistics are provided as console output by setting verbose=TRUE, one comma-separated name-value
 # pair per each line, as follows:
 #
-# NAME                  MEANING
 # ----------------------------------------------------------------------------------------------------------------------
 # TERMINATION_CODE      A positive integer indicating success/failure as follows:
 #                       1 = Converged successfully; 2 = Maximum number of iterations reached; 
@@ -80,7 +44,6 @@
 # The Log file, when requested, contains the following per-iteration variables in CSV format,
 # each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
 #
-# NAME                  MEANING
 # ----------------------------------------------------------------------------------------------------------------------
 # NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
 # IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
@@ -98,8 +61,7 @@
 #
 # SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
 # AND LINK FUNCTIONS:
-# ----------------------------------------------------------------------------------------------------------------------
-# INPUT PARAMETERS:    MEANING:            Cano-
+#
 # dfam vpow link lpow  Distribution.link   nical?
 # ----------------------------------------------------------------------------------------------------------------------
 #  1   0.0   1  -1.0   Gaussian.inverse
@@ -124,6 +86,37 @@
 #  2    *    4    *    Binomial.cloglog
 #  2    *    5    *    Binomial.cauchit
 # ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# --------------------------------------------------------------------------------------------
+# X        matrix X of feature vectors
+# Y        matrix Y with either 1 or 2 columns:
+#          if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
+# dfam     Distribution family code: 1 = Power, 2 = Binomial
+# vpow     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#          0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link     Link function code: 0 = canonical (depends on distribution),
+#          1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
+# lpow     Power for Link function defined as (mean)^power (ignored if link != 1):
+#          -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# yneg     Response value for Bernoulli "No" label, usually 0.0 or -1.0
+# icpt     Intercept presence, X columns shifting and rescaling:
+#          0 = no intercept, no shifting, no rescaling;
+#          1 = add intercept, but neither shift nor rescale X;
+#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg      Regularization parameter (lambda) for L2 regularization
+# tol      Tolerance (epsilon)
+# disp     (Over-)dispersion value, or 0.0 to estimate it from data
+# moi      Maximum number of outer (Newton / Fisher Scoring) iterations
+# mii      Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
+# verbose  if the Algorithm should be verbose
+# ------------------------------------------------------------------------------------------
+# 
+# OUTPUT:
+# --------------------------------------------------------------------------------------------
+# beta  Matrix beta, whose size depends on icpt:
+#       icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: (ncol(X) + 1) x 2
+#---------------------------------------------------------------------------------------------
 
 m_glm = function(Matrix[Double] X, Matrix[Double] Y, Integer dfam=1, 
   Double vpow=0.0, Integer link=0, Double lpow=1.0, Double yneg=0.0,
diff --git a/scripts/builtin/glmPredict.dml b/scripts/builtin/glmPredict.dml
index 484fdf4e47..3c0e09c6ba 100644
--- a/scripts/builtin/glmPredict.dml
+++ b/scripts/builtin/glmPredict.dml
@@ -19,41 +19,8 @@
 #
 #-------------------------------------------------------------
 
-# THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF A GLM-TYPE REGRESSION TO A NEW (TEST) DATASET
+# Applies the estimated parameters of a GLM type regression to a new dataset
 #
-# INPUTS PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME      TYPE            DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X         Matrix[Double]  ---      Matrix X of records (feature vectors)
-# B         Matrix[Double]  ---      GLM regression parameters (the betas), with dimensions
-#                                    ncol(X)   x k: do not add intercept
-#                                    ncol(X)+1 x k: add intercept as given by the last B-row
-#                                    if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
-# ytest     Matrix[Double]  " "      Response matrix Y, with the following dimensions:
-#                                    nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
-#                                    nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
-#                                    nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
-# dfam      Int             1        GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
-# vpow      Double          0.0      Power for Variance defined as (mean)^power (ignored if dfam != 1):
-#                                    0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
-# link      Int             0        Link function code: 0 = canonical (depends on distribution), 1 = Power,
-#                                    2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
-# lpow      Double          1.0      Power for Link function defined as (mean)^power (ignored if link != 1):
-#                                    -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
-# disp      Double          1.0      Dispersion value, when available
-# verbose   Boolean         TRUE     Print statistics to stdout
-# ---------------------------------------------------------------------------------------------
-
-# OUTPUTS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME  TYPE                MEANING
-# ---------------------------------------------------------------------------------------------
-# M     Matrix[Double]      Matrix M of predicted means/probabilities:
-#                           nrow(X) x 1  : for Power-type distributions (dfam=1)
-#                           nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
-#                           nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
-# ---------------------------------------------------------------------------------------------
 # Additional statistics are printed one per each line, in the following 
 # CSV format: NAME,[COLUMN],[SCALED],VALUE
 # ---
@@ -84,7 +51,36 @@
 # R2_NOBIAS               +             R^2 of Y column residual with bias subtracted
 # ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
 # ---------------------------------------------------------------------------------------------
-
+#
+# INPUT:
+# -------------------------------------------------------------------
+# X        Matrix X of records (feature vectors)
+# B        GLM regression parameters (the betas), with dimensions
+#          ncol(X)   x k: do not add intercept
+#          ncol(X)+1 x k: add intercept as given by the last B-row
+#          if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
+# ytest    Response matrix Y, with the following dimensions:
+#          nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
+#          nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
+#          nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
+# dfam     GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
+# vpow     Power for Variance defined as (mean)^power (ignored if dfam != 1):
+#          0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
+# link     Link function code: 0 = canonical (depends on distribution), 1 = Power,
+#          2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
+# lpow     Power for Link function defined as (mean)^power (ignored if link != 1):
+#          -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
+# disp     Dispersion value, when available
+# verbose  Print statistics to stdout
+# -------------------------------------------------------------------
+#
+# OUTPUT:
+# -------------------------------------------------------------------------
+# M     Matrix M of predicted means/probabilities:
+#       nrow(X) x 1  : for Power-type distributions (dfam=1)
+#       nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
+#       nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
+# -------------------------------------------------------------------------
 
 
 m_glmPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] ytest=matrix(0,0,0),
diff --git a/scripts/builtin/gmm.dml b/scripts/builtin/gmm.dml
index 62e9931691..4624e5e2af 100644
--- a/scripts/builtin/gmm.dml
+++ b/scripts/builtin/gmm.dml
@@ -22,36 +22,31 @@
 # The gmm-function implements builtin Gaussian Mixture Model with four different types of covariance matrices
 # i.e., VVV, EEE, VVI, VII and two initialization methods namely "kmeans" and "random".
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---         Matrix X
-# n_components    Integer           3          Number of n_components in the Gaussian mixture model
-# model           String           "VVV"       "VVV": unequal variance (full),each component has its own general covariance matrix
-#                                              "EEE": equal variance (tied), all components share the same general covariance matrix
-#                                              "VVI": spherical, unequal volume (diag), each component has its own diagonal
-#                                              covariance matrix
-#                                              "VII": spherical, equal volume (spherical), each component has its own single variance
-# init_param      String           "kmeans"    initialize weights with "kmeans" or "random"
-# iterations      Integer          100         Number of iterations
-# reg_covar       Double           1e-6        regularization parameter for covariance matrix
-# tol             Double           0.000001    tolerance value for convergence
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X             Matrix X
+# n_components  Number of n_components in the Gaussian mixture model
+# model         "VVV": unequal variance (full),each component has its own general covariance matrix
+#               "EEE": equal variance (tied), all components share the same general covariance matrix
+#               "VVI": spherical, unequal volume (diag), each component has its own diagonal
+#               covariance matrix
+#               "VII": spherical, equal volume (spherical), each component has its own single variance
+# init_param    initialize weights with "kmeans" or "random"
+# iterations    Number of iterations
+# reg_covar     regularization parameter for covariance matrix
+# tol           tolerance value for convergence
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                 MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# labels          Matrix[Double]       Prediction matrix
-# predict_prob    Matrix[Double]       Probability of the predictions
-# df              Integer              Number of estimated parameters
-# bic             Double               Bayesian information criterion for best iteration
-# mu              Matrix[Double]       fitted clusters mean
-# weight          Matrix[Double]       A matrix whose [i,k]th entry is the probability that observation i in the test data
-#                                      belongs to the kth class
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------
+# labels        Prediction matrix
+# predict_prob  Probability of the predictions
+# df            Number of estimated parameters
+# bic           Bayesian information criterion for best iteration
+# mu            fitted clusters mean
+# weight        A matrix whose [i,k]th entry is the probability that observation i in the test data
+#               belongs to the kth class
+# -----------------------------------------------------------------------------------------------
 
 m_gmm = function(Matrix[Double] X, Integer n_components = 3, String model = "VVV", String init_params = "kmeans", 
   Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Integer seed = -1, Boolean verbose = FALSE )
diff --git a/scripts/builtin/gmmPredict.dml b/scripts/builtin/gmmPredict.dml
index 54bc4db273..550cf6edfc 100644
--- a/scripts/builtin/gmmPredict.dml
+++ b/scripts/builtin/gmmPredict.dml
@@ -21,27 +21,22 @@
 
 # This function is a Prediction function for a Gaussian Mixture Model (gmm).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                   TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                      Matrix[Double]   ---       Matrix X (instances to be clustered)
-# weight                 Matrix[Double]   ---       Weight of learned model
-# mu                     Matrix[Double]   ---       fitted clusters mean
-# precisions_cholesky    Matrix[Double]   ---       fitted precision matrix for each mixture
-# model                  String           ---       fitted model
+# compute posterior probabilities for new instances given the variance and mean of fitted data
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X                     Matrix X (instances to be clustered)
+# weight                Weight of learned model
+# mu                    fitted clusters mean
+# precisions_cholesky   fitted precision matrix for each mixture
+# model                 fitted model
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                   TYPE        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# predict                Double      predicted cluster labels
-# posterior_prob         Double      probabilities of belongingness
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# compute posterior probabilities for new instances given the variance and mean of fitted data
+# ---------------------------------------------------------------------------------------------------
+# predict         predicted cluster labels
+# posterior_prob  probabilities of belongingness
+# ---------------------------------------------------------------------------------------------------
 
 m_gmmPredict = function(Matrix[Double] X, Matrix[Double] weight,
   Matrix[Double] mu, Matrix[Double] precisions_cholesky, String model = "VVV")
diff --git a/scripts/builtin/gnmf.dml b/scripts/builtin/gnmf.dml
index 18dad219e5..abd6659c80 100644
--- a/scripts/builtin/gnmf.dml
+++ b/scripts/builtin/gnmf.dml
@@ -28,23 +28,19 @@
 # Distributed nonnegative matrix factorization for web-scale dyadic
 # data analysis on mapreduce. WWW 2010: 681-690]
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE               DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]     ---       Matrix of feature vectors.
-# rnk       Integer            ---       Number of components into which matrix X is to be factored
-# eps       Double             1e-8      Tolerance
-# maxi      Integer            10        Maximum number of conjugate gradient iterations
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X      Matrix of feature vectors.
+# rnk    Number of components into which matrix X is to be factored
+# eps    Tolerance
+# maxi   Maximum number of conjugate gradient iterations
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# W        Matrix[Double]               List of pattern matrices, one for each repetition
-# H        Matrix[Double]               List of amplitude matrices, one for each repetition
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
+# W     List of pattern matrices, one for each repetition
+# H     List of amplitude matrices, one for each repetition
+# --------------------------------------------------------------------------------------
 
 m_gnmf = function(Matrix[Double] X, Integer rnk, Double eps = 1e-8, Integer maxi = 10)
   return (Matrix[Double] W, Matrix[Double] H) 
diff --git a/scripts/builtin/gridSearch.dml b/scripts/builtin/gridSearch.dml
index 8e53502257..9ef50895a9 100644
--- a/scripts/builtin/gridSearch.dml
+++ b/scripts/builtin/gridSearch.dml
@@ -22,40 +22,35 @@
 # The gridSearch-function is used to find the optimal hyper-parameters of a model which results in the most
 # accurate predictions. This function takes train and eval functions by name.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE               DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X            Matrix[Double]     ---        Input feature matrix
-# y            Matrix[Double]     ---        Input Matrix of vectors.
-# train        String             ---        Name ft of the train function to call via ft(trainArgs)
-# predict      String             ---        Name fp of the loss function to call via fp((predictArgs,B))
-# numB         Integer            ---        Maximum number of parameters in model B (pass the max because the size
-#                                            may vary with parameters like icpt or multi-class classification)
-# params       List[String]       ---        List of varied hyper-parameter names
-# dataArgs     List[String]       ---        List of data parameters (to identify data parameters by name i.e. list("X", "Y"))
-# paramValues  List[Unknown]      ---        List of matrices providing the parameter values as
-#                                            columnvectors for position-aligned hyper-parameters in 'params'
-# trainArgs    List[Unknown]      ---        named List of arguments to pass to the 'train' function, where
-#                                            gridSearch replaces enumerated hyper-parameter by name, if
-#                                            not provided or an empty list, the lm parameters are used
-# predictArgs  List[Unknown]      ---        List of arguments to pass to the 'predict' function, where
-#                                            gridSearch appends the trained models at the end, if
-#                                            not provided or an empty list, list(X, y) is used instead
-# cv           Boolean            FALSE      flag enabling k-fold cross validation, otherwise training loss
-# cvk          Integet            5          if cv=TRUE, specifies the the number of folds, otherwise ignored
-# verbose      Boolean            TRUE       flag for verbose debug output
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# X            Input feature matrix
+# y            Input Matrix of vectors.
+# train        Name ft of the train function to call via ft(trainArgs)
+# predict      Name fp of the loss function to call via fp((predictArgs,B))
+# numB         Maximum number of parameters in model B (pass the max because the size
+#              may vary with parameters like icpt or multi-class classification)
+# params       List of varied hyper-parameter names
+# dataArgs     List of data parameters (to identify data parameters by name i.e. list("X", "Y"))
+# paramValues  List of matrices providing the parameter values as
+#              columnvectors for position-aligned hyper-parameters in 'params'
+# trainArgs    named List of arguments to pass to the 'train' function, where
+#              gridSearch replaces enumerated hyper-parameter by name, if
+#              not provided or an empty list, the lm parameters are used
+# predictArgs  List of arguments to pass to the 'predict' function, where
+#              gridSearch appends the trained models at the end, if
+#              not provided or an empty list, list(X, y) is used instead
+# cv           flag enabling k-fold cross validation, otherwise training loss
+# cvk          if cv=TRUE, specifies the the number of folds, otherwise ignored
+# verbose      flag for verbose debug output
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# B            Matrix[Double]               Matrix[Double]the trained model with minimal loss (by the 'predict' function)
-#                                           Multi-column models are returned as a column-major linearized column vector
-# opt          Matrix[Double]               one-row frame w/ optimal hyperparameters (by 'params' position)
-#-----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------
+# B     Matrix[Double]the trained model with minimal loss (by the 'predict' function)
+#       Multi-column models are returned as a column-major linearized column vector
+# opt   one-row frame w/ optimal hyper-parameters (by 'params' position)
+#-----------------------------------------------------------------------------------
 
 m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict,
     Integer numB=ncol(X), List[String] params, List[Unknown] paramValues,
diff --git a/scripts/builtin/hospitalResidencyMatch.dml b/scripts/builtin/hospitalResidencyMatch.dml
index 0d5c640829..848350e510 100644
--- a/scripts/builtin/hospitalResidencyMatch.dml
+++ b/scripts/builtin/hospitalResidencyMatch.dml
@@ -21,33 +21,6 @@
 
 # This script computes a solution for the hospital residency match problem.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE            DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# R                     Matrix[Double]  ---     Residents matrix R.
-#                                               It must be an ORDERED  matrix.
-# H                     Matrix[Double]  ---     Hospitals matrix H.
-#                                               It must be an UNORDRED matrix.
-# capacity              Matrix[Double]  ---     capacity of Hospitals matrix C.
-#                                               It must be a [n*1] matrix with non zero values.
-#                                               i.e. the leftmost value in a row is the most preferred partner's index.
-#                                               i.e. the leftmost value in a row in P is the preference value for the acceptor
-#                                               with index 1 and vice-versa (higher is better).
-# verbose               Boolean         False   If the operation is verbose
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# residencyMatch        Matrix[Double]          Result Matrix
-#                                               If cell [i,j] is non-zero, it means that Resident i has matched with Hospital j.
-#                                               Further, if cell [i,j] is non-zero, it holds the preference value that led to the match.
-# hospitalMatch         Matrix[Double]          Result Matrix
-#                                               If cell [i,j] is non-zero, it means that Resident i has matched with Hospital j.
-#                                               Further, if cell [i,j] is non-zero, it holds the preference value that led to the match.
-#
-#
-# ----------------------------------------------------------------------------------------------------------------------
 # Residents.mtx:
 # 2.0,1.0,3.0
 # 1.0,2.0,3.0
@@ -82,7 +55,30 @@
 # Resident 1 has matched with Hospital 3 (since [1,3] is non-zero) at a preference level of 2.0.
 # Resident 2 has matched with Hospital 1 (since [2,1] is non-zero) at a preference level of 1.0.
 # Resident 3 has matched with Hospital 2 (since [3,2] is non-zero) at a preference level of 2.0.
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# ----------------------------------------------------------------------------------
+# R         Residents matrix R.
+#           It must be an ORDERED  matrix.
+# H         Hospitals matrix H.
+#           It must be an UNORDRED matrix.
+# capacity  capacity of Hospitals matrix C.
+#           It must be a [n*1] matrix with non zero values.
+#           i.e. the leftmost value in a row is the most preferred partner's index.
+#           i.e. the leftmost value in a row in P is the preference value for the acceptor
+#           with index 1 and vice-versa (higher is better).
+# verbose   If the operation is verbose
+# ----------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -----------------------------------------------------------------------------------------
+# residencyMatch   Result Matrix
+#                  If cell [i,j] is non-zero, it means that Resident i has matched with Hospital j.
+#                  Further, if cell [i,j] is non-zero, it holds the preference value that led to the match.
+# hospitalMatch    Result Matrix
+#                  If cell [i,j] is non-zero, it means that Resident i has matched with Hospital j.
+#                  Further, if cell [i,j] is non-zero, it holds the preference value that led to the match.
+# -----------------------------------------------------------------------------------------
 
 m_hospitalResidencyMatch = function(Matrix[Double] R, Matrix[Double] H, Matrix[Double] capacity, Boolean verbose = FALSE)
   return (Matrix[Double] residencyMatch, Matrix[Double] hospitalMatch)
diff --git a/scripts/builtin/hyperband.dml b/scripts/builtin/hyperband.dml
index 6830c7fc7b..3c2614e41c 100644
--- a/scripts/builtin/hyperband.dml
+++ b/scripts/builtin/hyperband.dml
@@ -27,30 +27,25 @@
 #    hyperband is hard-coded to use the number of iterations as a resource
 #    hyperband can only optimize continuous hyperparameters
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_train     Matrix[Double]    ---        Input Matrix of training vectors
-# y_train     Matrix[Double]    ---        Labels for training vectors
-# X_val       Matrix[Double]    ---        Input Matrix of validation vectors
-# y_val       Matrix[Double]    ---        Labels for validation vectors
-# params      List[String]      ---        List of parameters to optimize
-# paramRanges Matrix[Double]    ---        The min and max values for the uniform distributions to draw from.
-#                                          One row per hyper parameter, first column specifies min, second column max value.
-# R           Scalar[int]       81         Controls number of candidates evaluated
-# eta         Scalar[int]       3          Determines fraction of candidates to keep after each trial
-# verbose     Boolean           TRUE       If TRUE print messages are activated
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X_train      Input Matrix of training vectors
+# y_train      Labels for training vectors
+# X_val        Input Matrix of validation vectors
+# y_val        Labels for validation vectors
+# params       List of parameters to optimize
+# paramRanges  The min and max values for the uniform distributions to draw from.
+#              One row per hyper parameter, first column specifies min, second column max value.
+# R            Controls number of candidates evaluated
+# eta          Determines fraction of candidates to keep after each trial
+# verbose      If TRUE print messages are activated
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                TYPE                 MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# bestWeights         Matrix[Double]       1-column matrix of weights of best performing candidate
-# bestHyperParams     Frame[Unknown]       hyper parameters of best performing candidate
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------
+# bestWeights      1-column matrix of weights of best performing candidate
+# bestHyperParams  hyper parameters of best performing candidate
+# ----------------------------------------------------------------------------------------------
 
 m_hyperband = function(Matrix[Double] X_train, Matrix[Double] y_train,
   Matrix[Double] X_val, Matrix[Double] y_val, List[String] params, 
diff --git a/scripts/builtin/img_brightness.dml b/scripts/builtin/img_brightness.dml
index 60a14c29d7..965c0641cc 100644
--- a/scripts/builtin/img_brightness.dml
+++ b/scripts/builtin/img_brightness.dml
@@ -19,17 +19,15 @@
 #
 #-------------------------------------------------------------
 
-# The img_brightness-function is an image data augumentation function. It changes the brightness of the image.
+# The img_brightness-function is an image data augmentation function. It changes the brightness of the image.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in       Matrix[Double]    ---        Input matrix/image
-# value        Double            ---        The amount of brightness to be changed for the image
-# channel_max  Integer           ---        Maximum value of the brightness of the image
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+
+# -----------------------------------------------------------------------------------------
+# img_in       Input matrix/image
+# value        The amount of brightness to be changed for the image
+# channel_max  Maximum value of the brightness of the image
+# -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ----------------------------------------------------------------------------------------------------------------------
diff --git a/scripts/builtin/img_crop.dml b/scripts/builtin/img_crop.dml
index e2ee34a052..e85301f8bb 100644
--- a/scripts/builtin/img_crop.dml
+++ b/scripts/builtin/img_crop.dml
@@ -19,25 +19,21 @@
 #
 #-------------------------------------------------------------
 
-# The img_crop-function is an image data augumentation function. It cuts out a subregion of an image.
+# The img_crop-function is an image data augmentation function. It cuts out a subregion of an image.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE               DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in    Matrix[Double]     ---       Input matrix/image
-# w         Integer            ---       The width of the subregion required
-# h         Integer            ---       The height of the subregion required
-# x_offset  Integer            ---       The horizontal coordinate in the image to begin the crop operation
-# y_offset  Integer            ---       The vertical coordinate in the image to begin the crop operation
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# img_in    Input matrix/image
+# w         The width of the subregion required
+# h         The height of the subregion required
+# x_offset  The horizontal coordinate in the image to begin the crop operation
+# y_offset  The vertical coordinate in the image to begin the crop operation
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE                MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out  Matrix[Double]      Cropped matrix/image
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
+# img_out  Cropped matrix/image
+# --------------------------------------------------------------------------------------------------
 
 m_img_crop = function(Matrix[Double] img_in, Integer w, Integer h, Integer x_offset, Integer y_offset) return (Matrix[Double] img_out) {
   # crop - cut out a subregion of an image. Adapted from image_utils.dml
diff --git a/scripts/builtin/img_cutout.dml b/scripts/builtin/img_cutout.dml
index b9042fd7aa..cd3f432cd0 100644
--- a/scripts/builtin/img_cutout.dml
+++ b/scripts/builtin/img_cutout.dml
@@ -21,24 +21,20 @@
 
 # Image Cutout function replaces a rectangular section of an image with a constant value.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE            DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in      Matrix[Double]  ---      Input image as 2D matrix with top left corner at [1, 1]
-# x           Int             ---      Column index of the top left corner of the rectangle (starting at 1)
-# y           Int             ---      Row index of the top left corner of the rectangle (starting at 1)
-# width       Int             ---      Width of the rectangle (must be positive)
-# height      Int             ---      Height of the rectangle (must be positive)
-# fill_value  Double          ---      The value to set for the rectangle
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# x           Column index of the top left corner of the rectangle (starting at 1)
+# y           Row index of the top left corner of the rectangle (starting at 1)
+# width       Width of the rectangle (must be positive)
+# height      Height of the rectangle (must be positive)
+# fill_value  The value to set for the rectangle
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out     Matrix[Double]           Output image as 2D matrix with top left corner at [1, 1]
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# img_out  Output image as 2D matrix with top left corner at [1, 1]
+# ------------------------------------------------------------------------------------------
 
 m_img_cutout = function(Matrix[Double] img_in, Integer x, Integer y, Integer width, Integer height, Double fill_value) return (Matrix[Double] img_out) {
   rows = nrow(img_in)
diff --git a/scripts/builtin/img_invert.dml b/scripts/builtin/img_invert.dml
index b243cfa655..c52f5bed3a 100644
--- a/scripts/builtin/img_invert.dml
+++ b/scripts/builtin/img_invert.dml
@@ -19,22 +19,18 @@
 #
 #-------------------------------------------------------------
 
-# This is an image data augumentation function. It inverts an image.
+# This is an image data augmentation function. It inverts an image.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE            DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double]  ---      Input image
-# max_value  Double          ---      The maximum value pixels can have
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# img_in     Input image
+# max_value  The maximum value pixels can have
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]           Output image
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# img_out  Output image
+# -------------------------------------------------------------------------------------------
 
 m_img_invert = function(Matrix[Double] img_in, Double max_value) return (Matrix[Double] img_out) {
   img_out = max_value - img_in
diff --git a/scripts/builtin/img_mirror.dml b/scripts/builtin/img_mirror.dml
index 04311102d5..a8836f6fd2 100644
--- a/scripts/builtin/img_mirror.dml
+++ b/scripts/builtin/img_mirror.dml
@@ -19,22 +19,19 @@
 #
 #-------------------------------------------------------------
 
-# This function is an image data augumentation function. It flips an image on the X (horizontal) or Y (vertical) axis.
+# This function is an image data augmentation function.
+# It flips an image on the X (horizontal) or Y (vertical) axis.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE            DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double]  ---      Input matrix/image
-# max_value  Double          ---      The maximum value pixels can have
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# img_in     Input matrix/image
+# max_value  The maximum value pixels can have
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]           Flipped matrix/image
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# img_out  Flipped matrix/image
+# -------------------------------------------------------------------------------------------
 
 m_img_mirror = function(Matrix[Double] img_in, Boolean horizontal_axis) return (Matrix[Double] img_out) {
   # flip an image on the x (horizontal) or y (vertical) axis
diff --git a/scripts/builtin/img_posterize.dml b/scripts/builtin/img_posterize.dml
index 8953191e65..91578b9c76 100644
--- a/scripts/builtin/img_posterize.dml
+++ b/scripts/builtin/img_posterize.dml
@@ -22,21 +22,17 @@
 # The Image Posterize function limits pixel values to 2^bits different values in the range [0, 255].
 # Assumes the input image can attain values in the range [0, 255].
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE           DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double] ---      Input image
-# bits       Int            ---      The number of bits keep for the values.
-#                                    1 means black and white, 8 means every integer between 0 and 255.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# img_in  Input image
+# bits    The number of bits keep for the values.
+#         1 means black and white, 8 means every integer between 0 and 255.
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]         Output image
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# img_out  Output image
+# ---------------------------------------------------------------------------------------------
 
 m_img_posterize = function(Matrix[Double] img_in, Integer bits) return (Matrix[Double] img_out) {
   img_out = (img_in %/% 2^(8 - bits)) * (2^(8 - bits))
diff --git a/scripts/builtin/img_rotate.dml b/scripts/builtin/img_rotate.dml
index a06737c233..c49826c210 100644
--- a/scripts/builtin/img_rotate.dml
+++ b/scripts/builtin/img_rotate.dml
@@ -22,21 +22,17 @@
 # The Image Rotate function rotates the input image counter-clockwise around the center.
 # Uses nearest neighbor sampling.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE            DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double]  ---     Input image as 2D matrix with top left corner at [1, 1]
-# radians    Double          ---     The value by which to rotate in radian.
-# fill_value Double          ---     The background color revealed by the rotation
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------------------
+# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# radians     The value by which to rotate in radian.
+# fill_value  The background color revealed by the rotation
+# -----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]          Output image as 2D matrix with top left corner at [1, 1]
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# img_out   Output image as 2D matrix with top left corner at [1, 1]
+# ---------------------------------------------------------------------------------------------
 
 m_img_rotate = function(Matrix[Double] img_in, Double radians, Double fill_value) return (Matrix[Double] img_out) {
   # Translation matrix for moving the origin to the center of the image
diff --git a/scripts/builtin/img_sample_pairing.dml b/scripts/builtin/img_sample_pairing.dml
index 949dc1092d..99147b2555 100644
--- a/scripts/builtin/img_sample_pairing.dml
+++ b/scripts/builtin/img_sample_pairing.dml
@@ -21,22 +21,18 @@
 
 # The image sample pairing function blends two images together.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in1    Matrix[Double]   ---     First input image
-# img_in2    Matrix[Double]   ---     Second input image
-# weight     Double           ---     The weight given to the second image.
-#                                     0 means only img_in1, 1 means only img_in2 will be visible
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# img_in1  First input image
+# img_in2  Second input image
+# weight   The weight given to the second image.
+#          0 means only img_in1, 1 means only img_in2 will be visible
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]          Output image
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# img_out  Output image
+# --------------------------------------------------------------------------------------------
 
 m_img_sample_pairing = function(Matrix[Double] img_in1, Matrix[Double] img_in2, Double weight) return (Matrix[Double] img_out) {
   if (weight < 0 | 1 < weight) {
diff --git a/scripts/builtin/img_shear.dml b/scripts/builtin/img_shear.dml
index 745a25472d..2cf00592a6 100644
--- a/scripts/builtin/img_shear.dml
+++ b/scripts/builtin/img_shear.dml
@@ -22,22 +22,18 @@
 # This function applies a shearing transformation to an image.
 # Uses nearest neighbor sampling.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double]   ---      Input image as 2D matrix with top left corner at [1, 1]
-# shear_x    Double           ---      Shearing factor for horizontal shearing
-# shear_y    Double           ---      Shearing factor for vertical shearing
-# fill_value Double           ---      The background color revealed by the shearing
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# shear_x     Shearing factor for horizontal shearing
+# shear_y     Shearing factor for vertical shearing
+# fill_value  The background color revealed by the shearing
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]            Output image as 2D matrix with top left corner at [1, 1]
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# img_out  Output image as 2D matrix with top left corner at [1, 1]
+# ------------------------------------------------------------------------------------------
 
 m_img_shear = function(Matrix[Double] img_in, Double shear_x, Double shear_y, Double fill_value) return (Matrix[Double] img_out) {
   img_out = img_transform(img_in, ncol(img_in), nrow(img_in), 1, shear_x, 0, shear_y, 1, 0, fill_value)
diff --git a/scripts/builtin/img_transform.dml b/scripts/builtin/img_transform.dml
index 13571ae166..84eee6379e 100644
--- a/scripts/builtin/img_transform.dml
+++ b/scripts/builtin/img_transform.dml
@@ -23,23 +23,19 @@
 # Optionally resizes the image (without scaling).
 # Uses nearest neighbor sampling.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in          Matrix[Double]  ---     Input image as 2D matrix with top left corner at [1, 1]
-# out_w           Integer         ---     Width of the output image
-# out_h           Integer         ---     Height of the output image
-# a,b,c,d,e,f     Double          ---     The first two rows of the affine matrix in row-major order
-# fill_value      Double          ---     The background of the image
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# img_in       Input image as 2D matrix with top left corner at [1, 1]
+# out_w        Width of the output image
+# out_h        Height of the output image
+# a,b,c,d,e,f  The first two rows of the affine matrix in row-major order
+# fill_value   The background of the image
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix                       Output image as 2D matrix with top left corner at [1, 1]
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------
+# img_out  Output image as 2D matrix with top left corner at [1, 1]
+# ---------------------------------------------------------------------------------------
 
 m_img_transform = function(Matrix[Double] img_in, Integer out_w, Integer out_h, Double a, Double b, Double c, Double d,
  Double e, Double f, Double fill_value) return (Matrix[Double] img_out) {
diff --git a/scripts/builtin/img_translate.dml b/scripts/builtin/img_translate.dml
index 68c3aca102..9bf2664d33 100644
--- a/scripts/builtin/img_translate.dml
+++ b/scripts/builtin/img_translate.dml
@@ -23,24 +23,20 @@
 # Optionally resizes the image (without scaling).
 # Uses nearest neighbor sampling.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_in     Matrix[Double]   ---     Input image as 2D matrix with top left corner at [1, 1]
-# offset_x   Double           ---     The distance to move the image in x direction
-# offset_y   Double           ---     The distance to move the image in y direction
-# out_w      Int              ---     Width of the output image
-# out_h      Int              ---     Height of the output image
-# fill_value Double           ---     The background of the image
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------------
+# img_in      Input image as 2D matrix with top left corner at [1, 1]
+# offset_x    The distance to move the image in x direction
+# offset_y    The distance to move the image in y direction
+# out_w       Width of the output image
+# out_h       Height of the output image
+# fill_value  The background of the image
+# ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# img_out    Matrix[Double]          Output image as 2D matrix with top left corner at [1, 1]
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# img_out  Output image as 2D matrix with top left corner at [1, 1]
+# --------------------------------------------------------------------------------------------
 
 m_img_translate = function(Matrix[Double] img_in, Double offset_x, Double offset_y, Integer out_w, Integer out_h, Double fill_value)
  return (Matrix[Double] img_out) {
diff --git a/scripts/builtin/impurityMeasures.dml b/scripts/builtin/impurityMeasures.dml
index 860bc629f6..d62bdd04f5 100644
--- a/scripts/builtin/impurityMeasures.dml
+++ b/scripts/builtin/impurityMeasures.dml
@@ -22,31 +22,27 @@
 # This function computes the measure of impurity for the given dataset based on the passed method (gini or entropy).
 # The current version expects the target vector to contain only 0 or 1 values.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE               DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                     Matrix[Double]      ---        Feature matrix.
-# Y                     Matrix[Double]      ---        Target vector containing 0 and 1 values.
-# R                     Matrix[Double]      ---        Vector indicating whether a feature is categorical or continuous.
-#                                                      1 denotes a continuous feature, 2 denotes a categorical feature.
-# n_bins                Integer             20         Number of bins for binning in case of scale features.
-# method                String              ---        String indicating the method to use; either "entropy" or "gini".
-# ----------------------------------------------------------------------------------------------------------------------
-
-# Output(s)
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE               DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# IM                    Matrix[Double]      ---        (1 x ncol(X)) row vector containing information/gini gain for
-#                                                      each feature of the dataset.
-#                                                      In case of gini, the values denote the gini gains, i.e. how much
-#                                                      impurity was removed with the respective split. The higher the
-#                                                      value, the better the split.
-#                                                      In case of entropy, the values denote the information gain, i.e.
-#                                                      how much entropy was removed. The higher the information gain,
-#                                                      the better the split.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------
+# X        Feature matrix.
+# Y        Target vector containing 0 and 1 values.
+# R        Vector indicating whether a feature is categorical or continuous.
+#          1 denotes a continuous feature, 2 denotes a categorical feature.
+# n_bins   Number of bins for binning in case of scale features.
+# method   String indicating the method to use; either "entropy" or "gini".
+# --------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------
+# IM     (1 x ncol(X)) row vector containing information/gini gain for
+#        each feature of the dataset.
+#        In case of gini, the values denote the gini gains, i.e. how much
+#        impurity was removed with the respective split. The higher the
+#        value, the better the split.
+#        In case of entropy, the values denote the information gain, i.e.
+#        how much entropy was removed. The higher the information gain,
+#        the better the split.
+# ------------------------------------------------------------------------
 
 m_impurityMeasures = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] R, Integer n_bins = 20, String method)
   return (Matrix[Double] IM)
diff --git a/scripts/builtin/imputeByFD.dml b/scripts/builtin/imputeByFD.dml
index da553e1c72..2f078c056a 100644
--- a/scripts/builtin/imputeByFD.dml
+++ b/scripts/builtin/imputeByFD.dml
@@ -20,24 +20,20 @@
 #-------------------------------------------------------------
 
 # Implements builtin for imputing missing values from observed values (if exist) using robust functional dependencies
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---       Vector X, source attribute of functional dependency
-# Y               Matrix[Double]   ---       Vector Y, target attribute of functional dependency and imputation
-# threshold       Double           ---       threshold value in interval [0, 1] for robust FDs
-# verbose         Boolean          FALSE     flag for printing verbose debug output
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X          Vector X, source attribute of functional dependency
+# Y          Vector Y, target attribute of functional dependency and imputation
+# threshold  threshold value in interval [0, 1] for robust FDs
+# verbose    flag for printing verbose debug output
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y               Matrix[Double]             Vector Y, with missing values mapped to a new max value
-# Y_imp           Matrix[Double]             Vector Y, with imputed missing values
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------
+# Y      Vector Y, with missing values mapped to a new max value
+# Y_imp  Vector Y, with imputed missing values
+# ----------------------------------------------------------------------------------
 
 m_imputeByFD = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
   return(Matrix[Double] Y, Matrix[Double] Y_imp)
diff --git a/scripts/builtin/imputeByFDApply.dml b/scripts/builtin/imputeByFDApply.dml
index 0f4d495959..80f7bbe49e 100644
--- a/scripts/builtin/imputeByFDApply.dml
+++ b/scripts/builtin/imputeByFDApply.dml
@@ -20,23 +20,19 @@
 #-------------------------------------------------------------
 
 # Implements builtin for imputing missing values from observed values (if exist) using robust functional dependencies
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---       Matrix X
-# source          Integer          ---       source attribute to use for imputation and error correction
-# target          Integer          ---       attribute to be fixed
-# threshold       Double           ---       threshold value in interval [0, 1] for robust FDs
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X          Matrix X
+# source     source attribute to use for imputation and error correction
+# target     attribute to be fixed
+# threshold  threshold value in interval [0, 1] for robust FDs
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]             Matrix with possible imputations
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------
+# X     Matrix with possible imputations
+# ---------------------------------------------------------------------------------
 
 m_imputeByFDApply = function(Matrix[Double] X, Matrix[Double] Y_imp)
   return(Matrix[Double] imputed_Y)
diff --git a/scripts/builtin/imputeByMean.dml b/scripts/builtin/imputeByMean.dml
index 210f6508be..0d14496037 100644
--- a/scripts/builtin/imputeByMean.dml
+++ b/scripts/builtin/imputeByMean.dml
@@ -22,20 +22,16 @@
 # impute the data by mean value and if the feature is categorical then by mode value
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X          Matrix[Double]    ---         Data Matrix (Recoded Matrix for categorical features)
-# mask       Matrix[Double]    ---         A 0/1 row vector for identifying numeric (0) and categorical features (1)
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------
+# X       Data Matrix (Recoded Matrix for categorical features)
+# mask    A 0/1 row vector for identifying numeric (0) and categorical features (1)
+# -------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                           MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]                 imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# X     imputed dataset
+# -----------------------------------------------------------------------------------
 
 m_imputeByMean = function(Matrix[Double] X, Matrix[Double] mask)
 return(Matrix[Double] X, Matrix[Double] imputedVec)
diff --git a/scripts/builtin/imputeByMeanApply.dml b/scripts/builtin/imputeByMeanApply.dml
index 41fb2b559d..4633f63430 100644
--- a/scripts/builtin/imputeByMeanApply.dml
+++ b/scripts/builtin/imputeByMeanApply.dml
@@ -22,20 +22,16 @@
 # impute the data by mean value and if the feature is categorical then by mode value
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                     TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                      Matrix[Double]    ---         Data Matrix (Recoded Matrix for categorical features)
-# imputationVector       Matrix[Double]    ---         column mean vector
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X                  Data Matrix (Recoded Matrix for categorical features)
+# imputationVector   column mean vector
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                           MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]                 imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# X     imputed dataset
+# -----------------------------------------------------------------------------------
 
 m_imputeByMeanApply = function(Matrix[Double] X, Matrix[Double] imputedVec)
 return(Matrix[Double] X)
diff --git a/scripts/builtin/imputeByMedian.dml b/scripts/builtin/imputeByMedian.dml
index c40c9b15b8..74e871e70b 100644
--- a/scripts/builtin/imputeByMedian.dml
+++ b/scripts/builtin/imputeByMedian.dml
@@ -20,23 +20,19 @@
 #-------------------------------------------------------------
 
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
-
+#
 # impute the data by median value and if the feature is categorical then by mode value
-
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# X               Double    ---        Data Matrix (Recoded Matrix for categorical features)
-# mask           Double    ---        A 0/1 row vector for identifying numeric (0) and categorical features (1)
-# ---------------------------------------------------------------------------------------------
- 
-
-#Output(s)
-# ---------------------------------------------------------------------------------------------
-# NAME                  TYPE    DEFAULT     MEANING
+#
+# INPUT:
+# ---------------------------------------------------------------
+# X     Data Matrix (Recoded Matrix for categorical features)
+# mask  A 0/1 row vector for identifying numeric (0) and categorical features (1)
+# ---------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------
+# X     imputed dataset
 # ---------------------------------------------------------------------------------------------
-# X               Double   ---        imputed dataset
 
 
 
diff --git a/scripts/builtin/imputeByMedianApply.dml b/scripts/builtin/imputeByMedianApply.dml
index 6ed5e76e55..99b064470c 100644
--- a/scripts/builtin/imputeByMedianApply.dml
+++ b/scripts/builtin/imputeByMedianApply.dml
@@ -22,20 +22,16 @@
 # impute the data by median value and if the feature is categorical then by mode value
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                     TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                      Matrix[Double]    ---         Data Matrix (Recoded Matrix for categorical features)
-# imputationVector       Matrix[Double]    ---         column median vector
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X                  Data Matrix (Recoded Matrix for categorical features)
+# imputationVector   column median vector
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                           MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]                 imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# X     imputed dataset
+# -----------------------------------------------------------------------------------
 
 m_imputeByMedianApply = function(Matrix[Double] X, Matrix[Double] imputedVec)
 return(Matrix[Double] X)
diff --git a/scripts/builtin/imputeByMode.dml b/scripts/builtin/imputeByMode.dml
index ad26148ca1..c6f97e5bda 100644
--- a/scripts/builtin/imputeByMode.dml
+++ b/scripts/builtin/imputeByMode.dml
@@ -22,19 +22,15 @@
 # This function impute the data by mode value
 # Related to [SYSTEMDS-2902] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X          Matrix[Double]   ---         Data Matrix (Recoded Matrix for categorical features)
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X     Data Matrix (Recoded Matrix for categorical features)
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X          Matrix[Double]             imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
+# X     imputed dataset
+# --------------------------------------------------------------------------------------
 
 m_imputeByMode = function(Matrix[Double] X)
 return(Matrix[Double] X, Matrix[Double] imputedVec)
diff --git a/scripts/builtin/imputeByModeApply.dml b/scripts/builtin/imputeByModeApply.dml
index 8fe0ab68a3..32ff54ed84 100644
--- a/scripts/builtin/imputeByModeApply.dml
+++ b/scripts/builtin/imputeByModeApply.dml
@@ -22,20 +22,16 @@
 # impute the data by most frequent value (recoded data only)
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                     TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                      Matrix[Double]    ---         Data Matrix (Recoded Matrix for categorical features)
-# imputationVector       Matrix[Double]    ---         column mean vector
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X                  Data Matrix (Recoded Matrix for categorical features)
+# imputationVector   column mean vector
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                           MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]                 imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# X     imputed dataset
+# -----------------------------------------------------------------------------------
 
 m_imputeByModeApply = function(Matrix[Double] X, Matrix[Double] imputedVec)
 return(Matrix[Double] X)
diff --git a/scripts/builtin/intersect.dml b/scripts/builtin/intersect.dml
index f4ab3a549e..554902ebed 100644
--- a/scripts/builtin/intersect.dml
+++ b/scripts/builtin/intersect.dml
@@ -21,20 +21,16 @@
 
 # Implements set intersection for numeric data
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X            Matrix[Double]  ---         matrix X, set A
-# Y            Matrix[Double]  ---         matrix Y, set B
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------
+# X     matrix X, set A
+# Y     matrix Y, set B
+# -----------------------------------------------------------------------------------
 #
-# Output(s)
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# R            Matrix[Double]             intersection matrix, set of intersecting items
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# ------------------------------------------------------------------------------------
+# R     intersection matrix, set of intersecting items
+# ------------------------------------------------------------------------------------
 
 m_intersect = function(Matrix[Double] X, Matrix[Double] Y)
   return(Matrix[Double] R)
diff --git a/scripts/builtin/km.dml b/scripts/builtin/km.dml
index 32194b3673..b4c2afc7c2 100644
--- a/scripts/builtin/km.dml
+++ b/scripts/builtin/km.dml
@@ -21,71 +21,67 @@
 
 # Builtin function that implements the analysis of survival data with KAPLAN-MEIER estimates
 #
-# INPUT   PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]   ---         Input matrix X containing the survival data:
-#                                        timestamps, whether event occurred (1) or data is censored (0), and a
-#                                        number of factors (categorical features) for grouping and/or stratifying
-# TE        Matrix[Double]   ---         Column indices of X which contain timestamps (first entry) and event
-#                                        information (second entry)
-# GI        Matrix[Double]   ---         Column indices of X corresponding to the factors to be used for grouping
-# SI        Matrix[Double]   ---         Column indices of X corresponding to the factors to be used for stratifying
-# alpha     Double           0.05        Parameter to compute 100*(1-alpha)% confidence intervals for the survivor
-#                                        function and its median
-# err_type  String                       "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
-# conf_type String           "log"       Parameter to modify the confidence interval; "plain" keeps the lower and
-#                                        upper bound of the confidence interval unmodified, "log" (the default)
-#                                        corresponds to logistic transformation and "log-log" corresponds to the
-#                                        complementary log-log transformation
-# test_type String           "none"      If survival data for multiple groups is available specifies which test to
-#                                        perform for comparing survival data across multiple groups: "none" (the default)
-#                                        "log-rank" or "wilcoxon" test
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X          Input matrix X containing the survival data:
+#            timestamps, whether event occurred (1) or data is censored (0), and a
+#            number of factors (categorical features) for grouping and/or stratifying
+# TE         Column indices of X which contain timestamps (first entry) and event
+#            information (second entry)
+# GI         Column indices of X corresponding to the factors to be used for grouping
+# SI         Column indices of X corresponding to the factors to be used for stratifying
+# alpha      Parameter to compute 100*(1-alpha)% confidence intervals for the survivor
+#            function and its median
+# err_type   "greenwood" Parameter to specify the error type according to "greenwood" (the default) or "peto"
+# conf_type  Parameter to modify the confidence interval; "plain" keeps the lower and
+#            upper bound of the confidence interval unmodified, "log" (the default)
+#            corresponds to logistic transformation and "log-log" corresponds to the
+#            complementary log-log transformation
+# test_type  If survival data for multiple groups is available specifies which test to
+#            perform for comparing survival data across multiple groups: "none" (the default)
+#            "log-rank" or "wilcoxon" test
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# O              Matrix[Double]          Matrix KM whose dimension depends on the number of groups (denoted by g) and
-#                                        strata (denoted by s) in the data:
-#                                        each collection of 7 consecutive columns in KM corresponds to a unique
-#                                        combination of groups and strata in the data with the following schema
-#                                        1. col: timestamp
-#                                        2. col: no. at risk
-#                                        3. col: no. of events
-#                                        4. col: Kaplan-Meier estimate of survivor function surv
-#                                        5. col: standard error of surv
-#                                        6. col: lower 100*(1-alpha)% confidence interval for surv
-#                                        7. col: upper 100*(1-alpha)% confidence interval for surv
-# M              Matrix[Double]          Matrix M whose dimension depends on the number of groups (g) and strata (s) in
-#                                        the data (k denotes the number of factors used for grouping  ,i.e., ncol(GI) and
-#                                        l denotes the number of factors used for stratifying, i.e., ncol(SI))
-#                                        M[,1:k]: unique combination of values in the k factors used for grouping
-#                                        M[,(k+1):(k+l)]: unique combination of values in the l factors used for stratifying
-#                                        M[,k+l+1]: total number of records
-#                                        M[,k+l+2]: total number of events
-#                                        M[,k+l+3]: median of surv
-#                                        M[,k+l+4]: lower 100*(1-alpha)% confidence interval of the median of surv
-#                                        M[,k+l+5]: upper 100*(1-alpha)% confidence interval of the median of surv
-#                                        If the number of groups and strata is equal to 1, M will have 4 columns with
-#                                        M[,1]: total number of events
-#                                        M[,2]: median of surv
-#                                        M[,3]: lower 100*(1-alpha)% confidence interval of the median of surv
-#                                        M[,4]: upper 100*(1-alpha)% confidence interval of the median of surv
-# T_GROUPS_OE    Matrix[Double]          If survival data from multiple groups available and ttype=log-rank or wilcoxon,
-#                                        a 1 x 4 matrix T and an g x 5 matrix T_GROUPS_OE with
-#                                        T_GROUPS_OE[,1] = no. of events
-#                                        T_GROUPS_OE[,2] = observed value (O)
-#                                        T_GROUPS_OE[,3] = expected value (E)
-#                                        T_GROUPS_OE[,4] = (O-E)^2/E
-#                                        T_GROUPS_OE[,5] = (O-E)^2/V
-#                                        T[1,1] = no. of groups
-#                                        T[1,2] = degree of freedom for Chi-squared distributed test statistic
-#                                        T[1,3] = test statistic
-#                                        T[1,4] = P-value
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# O            Matrix KM whose dimension depends on the number of groups (denoted by g) and
+#              strata (denoted by s) in the data:
+#              each collection of 7 consecutive columns in KM corresponds to a unique
+#              combination of groups and strata in the data with the following schema
+#              1. col: timestamp
+#              2. col: no. at risk
+#              3. col: no. of events
+#              4. col: Kaplan-Meier estimate of survivor function surv
+#              5. col: standard error of surv
+#              6. col: lower 100*(1-alpha)% confidence interval for surv
+#              7. col: upper 100*(1-alpha)% confidence interval for surv
+# M            Matrix M whose dimension depends on the number of groups (g) and strata (s) in
+#              the data (k denotes the number of factors used for grouping  ,i.e., ncol(GI) and
+#              l denotes the number of factors used for stratifying, i.e., ncol(SI))
+#              M[,1:k]: unique combination of values in the k factors used for grouping
+#              M[,(k+1):(k+l)]: unique combination of values in the l factors used for stratifying
+#              M[,k+l+1]: total number of records
+#              M[,k+l+2]: total number of events
+#              M[,k+l+3]: median of surv
+#              M[,k+l+4]: lower 100*(1-alpha)% confidence interval of the median of surv
+#              M[,k+l+5]: upper 100*(1-alpha)% confidence interval of the median of surv
+#              If the number of groups and strata is equal to 1, M will have 4 columns with
+#              M[,1]: total number of events
+#              M[,2]: median of surv
+#              M[,3]: lower 100*(1-alpha)% confidence interval of the median of surv
+#              M[,4]: upper 100*(1-alpha)% confidence interval of the median of surv
+# T_GROUPS_OE  If survival data from multiple groups available and ttype=log-rank or wilcoxon,
+#              a 1 x 4 matrix T and an g x 5 matrix T_GROUPS_OE with
+#              T_GROUPS_OE[,1] = no. of events
+#              T_GROUPS_OE[,2] = observed value (O)
+#              T_GROUPS_OE[,3] = expected value (E)
+#              T_GROUPS_OE[,4] = (O-E)^2/E
+#              T_GROUPS_OE[,5] = (O-E)^2/V
+#              T[1,1] = no. of groups
+#              T[1,2] = degree of freedom for Chi-squared distributed test statistic
+#              T[1,3] = test statistic
+#              T[1,4] = P-value
+# --------------------------------------------------------------------------------------------
 
 m_km = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] GI, Matrix[Double] SI,
     Double alpha = 0.05, String err_type = "greenwood", String conf_type = "log", String test_type = "none")
diff --git a/scripts/builtin/kmeans.dml b/scripts/builtin/kmeans.dml
index 1e7e9dfabe..7fdd320a16 100644
--- a/scripts/builtin/kmeans.dml
+++ b/scripts/builtin/kmeans.dml
@@ -21,28 +21,24 @@
 
 # Builtin function that implements the k-Means clustering algorithm
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                              TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                                 Matrix[Double]    ---      The input Matrix to do KMeans on.
-# k                                 Int               10       Number of centroids
-# runs                              Int               10       Number of runs (with different initial centroids)
-# max_iter                          Int               1000     Maximum number of iterations per run
-# eps                               Double            0.000001 Tolerance (epsilon) for WCSS change ratio
-# is_verbose                        Boolean           FALSE    do not print per-iteration stats
-# avg_sample_size_per_centroid      Int               50       Average number of records per centroid in data samples
-# seed                              Int               -1       The seed used for initial sampling. If set to -1
-#                                                              random seeds are selected.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X                             The input Matrix to do KMeans on.
+# k                             Number of centroids
+# runs                          Number of runs (with different initial centroids)
+# max_iter                      Maximum number of iterations per run
+# eps                           Tolerance (epsilon) for WCSS change ratio
+# is_verbose                    do not print per-iteration stats
+# avg_sample_size_per_centroid  Average number of records per centroid in data samples
+# seed                          The seed used for initial sampling. If set to -1
+#                               random seeds are selected.
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                              TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y                                 Matrix[Double]             The mapping of records to centroids
-# C                                 Matrix[Double]             The output matrix with the centroids
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------
+# Y     The mapping of records to centroids
+# C     The output matrix with the centroids
+# ---------------------------------------------------------------
 
 m_kmeans = function(Matrix[Double] X, Integer k = 10, Integer runs = 10, Integer max_iter = 1000,
     Double eps = 0.000001, Boolean is_verbose = FALSE, Integer avg_sample_size_per_centroid = 50,
diff --git a/scripts/builtin/kmeansPredict.dml b/scripts/builtin/kmeansPredict.dml
index 7e80e97282..29dc395b80 100644
--- a/scripts/builtin/kmeansPredict.dml
+++ b/scripts/builtin/kmeansPredict.dml
@@ -21,20 +21,16 @@
 
 # Builtin function that does predictions based on a set of centroids provided.
 #
-# INPUT PARAMETERS:
-# -----------------------------------------------------------------------------
-# NAME     TYPE              DEFAULT  MEANING
-# -----------------------------------------------------------------------------
-# X        Matrix[Double]    ---      The input Matrix to do KMeans on.
-# C        Matrix[Double]    ---      The input Centroids to map X onto.
-# -----------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------
+# X     The input Matrix to do KMeans on.
+# C     The input Centroids to map X onto.
+# -----------------------------------------------
 #
 # OUTPUT:
-# -----------------------------------------------------------------------------
-# NAME     TYPE                MEANING
-# -----------------------------------------------------------------------------
-# Y        Matrix[Double]      The mapping of records to centroids
-# -----------------------------------------------------------------------------
+# ------------------------------------------------------
+# Y     The mapping of records to centroids
+# ------------------------------------------------------
 
 m_kmeansPredict = function(Matrix[Double] X, Matrix[Double] C)
   return (Matrix[Double] Y)
diff --git a/scripts/builtin/knn.dml b/scripts/builtin/knn.dml
index 8a24713b75..6492e777e3 100644
--- a/scripts/builtin/knn.dml
+++ b/scripts/builtin/knn.dml
@@ -20,40 +20,37 @@
 #-------------------------------------------------------------
 
 # This script implements KNN (K Nearest Neighbor) algorithm.
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                  TYPE             DEFAULT     OPTIONAL     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Train                 Matrix[Double]   ---         N           The input matrix as features
-# Test                  Matrix[Double]   ---         N           The input matrix for nearest neighbor search
-# CL                    Matrix[Double]   ---         Y           The input matrix as target
-# CL_T                  Integer          0           Y           The target type of matrix CL whether
-#                                                                columns in CL are continuous ( =1 ) or
-#                                                                categorical ( =2 ) or not specified ( =0 )
-# trans_continuous      Boolean          FALSE       Y           Option flag for continuous feature transformed to [-1,1]:
-#                                                                FALSE = do not transform continuous variable;
-#                                                                TRUE = transform continuous variable;
-# k_value               int              5           Y           k value for KNN, ignore if select_k enable
-# select_k              Boolean          FALSE       Y           Use k selection algorithm to estimate k (TRUE means yes)
-# k_min                 int              1           Y           Min k value(  available if select_k = 1 )
-# k_max                 int              100         Y           Max k value(  available if select_k = 1 )
-# select_feature        Boolean          FALSE       Y           Use feature selection algorithm to select feature (TRUE means yes)
-# feature_max           int              10          Y           Max feature selection
-# interval              int              1000        Y           Interval value for K selecting (  available if select_k = 1 )
-# feature_importance    Boolean          FALSE       Y           Use feature importance algorithm to estimate each feature
-#                                                                (TRUE means yes)
-# predict_con_tg        int              0           Y           Continuous  target predict function: mean(=0) or median(=1)
-# START_SELECTED        Matrix[Double]   Empty       Y           feature selection initinal value
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# ---------------------------------------------------------------------------
+# Train               The input matrix as features
+# Test                The input matrix for nearest neighbor search
+# CL                  The input matrix as target
+# CL_T                The target type of matrix CL whether
+#                     columns in CL are continuous ( =1 ) or
+#                     categorical ( =2 ) or not specified ( =0 )
+# trans_continuous    Option flag for continuous feature transformed to [-1,1]:
+#                     FALSE = do not transform continuous variable;
+#                     TRUE = transform continuous variable;
+# k_value             k value for KNN, ignore if select_k enable
+# select_k            Use k selection algorithm to estimate k (TRUE means yes)
+# k_min               Min k value(  available if select_k = 1 )
+# k_max               Max k value(  available if select_k = 1 )
+# select_feature      Use feature selection algorithm to select feature (TRUE means yes)
+# feature_max         Max feature selection
+# interval            Interval value for K selecting (  available if select_k = 1 )
+# feature_importance  Use feature importance algorithm to estimate each feature
+#                     (TRUE means yes)
+# predict_con_tg      Continuous  target predict function: mean(=0) or median(=1)
+# START_SELECTED      feature selection initial value
+# ---------------------------------------------------------------------------
+#
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                    TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# NNR_matrix              Matrix[Double]         ---
-# CL_matrix               Matrix[Double]         ---
-# m_feature_importance    Matrix[Double]         Feature importance value
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# NNR_matrix            ---
+# CL_matrix             ---
+# m_feature_importance  Feature importance value
+# ---------------------------------------------------------------------------------------------
 
 m_knn = function(
     Matrix[Double] Train,
diff --git a/scripts/builtin/knnGraph.dml b/scripts/builtin/knnGraph.dml
index 36155d0921..0ad2618163 100644
--- a/scripts/builtin/knnGraph.dml
+++ b/scripts/builtin/knnGraph.dml
@@ -19,7 +19,18 @@
 #
 #-------------------------------------------------------------
 
-# Builtin for k nearest neighbour graph construction
+# Builtin for k nearest neighbor graph construction
+#
+# INPUT:
+# --------------------------
+# X        ---
+# k        ---
+# --------------------------
+#
+# OUTPUT:
+# ------------------------
+# graph  ---
+# ------------------------
 
 m_knnGraph = function(Matrix[double] X, integer k) return (Matrix[double] graph) {
   distances = dist(X);
diff --git a/scripts/builtin/knnbf.dml b/scripts/builtin/knnbf.dml
index 4c9540a8e8..76d88cdf75 100644
--- a/scripts/builtin/knnbf.dml
+++ b/scripts/builtin/knnbf.dml
@@ -21,21 +21,17 @@
 
 # This script implements KNN (K Nearest Neighbor) algorithm.
 #
-# INPUT PARAMETERS:
-# -----------------------------------------------------------
-# NAME           TYPE              DEFAULT  MEANING
-# -----------------------------------------------------------
-# X              Matrix[Double]    ---      ---
-# T              Matrix[Double]    ---      ---
-# k_value        Integer           5        ---
-# -----------------------------------------------------------
+# INPUT:
+# --------------------------
+# X        ---
+# T        ---
+# k_value  ---
+# --------------------------
 #
 # OUTPUT:
-# -----------------------------------------------------------
-# NAME           TYPE                       MEANING
-# -----------------------------------------------------------
-# NNR            Matrix[Double]             ---
-# -----------------------------------------------------------
+# ------------------------
+# NNR    ---
+# ------------------------
 
 m_knnbf = function(Matrix[Double] X, Matrix[Double] T, Integer k_value = 5) 
   return(Matrix[Double] NNR)
diff --git a/scripts/builtin/l2svm.dml b/scripts/builtin/l2svm.dml
index 9d3fca7d15..cdcc1ba4df 100644
--- a/scripts/builtin/l2svm.dml
+++ b/scripts/builtin/l2svm.dml
@@ -21,29 +21,25 @@
 
 # Builtin function Implements binary-class SVM with squared slack variables
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         matrix X of feature vectors
-# Y               Matrix[Double]  ---         matrix Y of class labels have to be a single column
-# intercept       Boolean         False       No Intercept ( If set to TRUE then a constant bias column is added to X)
-# epsilon         Double          0.001       Procedure terminates early if the reduction in objective function value is less
-#                                             than epsilon (tolerance) times the initial objective function value.
-# reg             Double          1.0         Regularization parameter (reg) for L2 regularization
-# maxIterations   Int             100         Maximum number of conjugate gradient iterations
-# maxii           Int             20          -
-# verbose         Boolean         FALSE       Set to true if one wants print statements updating on loss.
-# columnId        Int             -1          The column Id used if one wants to add a ID to the print statement, Specificly
-#                                             usefull when L2SVM is used in MSVM.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------------
+# X              matrix X of feature vectors
+# Y              matrix Y of class labels have to be a single column
+# intercept      No Intercept ( If set to TRUE then a constant bias column is added to X)
+# epsilon        Procedure terminates early if the reduction in objective function value is less
+#                than epsilon (tolerance) times the initial objective function value.
+# reg            Regularization parameter (reg) for L2 regularization
+# maxIterations  Maximum number of conjugate gradient iterations
+# maxii          max inner for loop iterations
+# verbose        Set to true if one wants print statements updating on loss.
+# columnId       The column Id used if one wants to add a ID to the print statement, 
+#                eg. used when L2SVM is used in MSVM.
+# -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# model           Matrix[Double]     model matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# model  the trained model
+# ------------------------------------------------------------------------------------------
 
 m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
     Double epsilon = 0.001, Double reg = 1, Integer maxIterations = 100, 
diff --git a/scripts/builtin/l2svmPredict.dml b/scripts/builtin/l2svmPredict.dml
index 960ffd919a..88b1497c06 100644
--- a/scripts/builtin/l2svmPredict.dml
+++ b/scripts/builtin/l2svmPredict.dml
@@ -20,24 +20,20 @@
 #-------------------------------------------------------------
 
 # Builtin function Implements binary-class SVM with squared slack variables.
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         matrix X of feature vectors to classify
-# W               Matrix[Double]  ---         matrix of the trained variables
-# verbose         Boolean         FALSE       Set to true if one wants print statements.
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# -----------------------------------------------------------------------------------
+# X        matrix X of feature vectors to classify
+# W        matrix of the trained variables
+# verbose  Set to true if one wants print statements.
+# -----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# YRaw            Matrix[Double]      Classification Labels Raw, meaning not modified to clean
-#                                     Labeles of 1's and -1's
-# Y               Matrix[Double]      Classification Labels Maxed to ones and zeros.
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# YRaw  Classification Labels Raw, meaning not modified to clean
+#       labels of 1's and -1's
+# Y     Classification Labels Maxed to ones and zeros.
+# ----------------------------------------------------------------------------------------
 
 m_l2svmPredict = function(Matrix[Double] X, Matrix[Double] W, Boolean verbose = FALSE)
   return(Matrix[Double] YRaw, Matrix[Double] Y)
diff --git a/scripts/builtin/lasso.dml b/scripts/builtin/lasso.dml
index 8bb6e19c7a..16f3c02dcf 100644
--- a/scripts/builtin/lasso.dml
+++ b/scripts/builtin/lasso.dml
@@ -22,25 +22,21 @@
 # Builtin function for the SpaRSA algorithm to perform lasso regression
 # (SpaRSA .. Sparse Reconstruction by Separable Approximation)
 #
-# INPUTS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---         input feature matrix
-# y               Matrix[Double]   ---         matrix Y columns of the design matrix
-# tol             Double           1e-15       target convergence tolerance
-# M               Integer          5           history length
-# tau             Double           1           regularization component
-# maxi            Integer          100         maximum number of iterations until convergence
-# verbose         Boolean          True        if the builtin should be verbose
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------
+# X        input feature matrix
+# y        matrix Y columns of the design matrix
+# tol      target convergence tolerance
+# M        history length
+# tau      regularization component
+# maxi     maximum number of iterations until convergence
+# verbose  if the builtin should be verbose
+# -----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# w               Matrix[Double]              model matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------
+# w     model matrix
+# --------------------------------------------------------------------------------
 
 m_lasso = function(Matrix[Double] X, Matrix[Double] y, Double tol = 1e-15, 
   Integer M = 5, Double tau = 1, Integer maxi = 100, Boolean verbose = TRUE)
diff --git a/scripts/builtin/lenetPredict.dml b/scripts/builtin/lenetPredict.dml
index ca184b0260..12243c925b 100644
--- a/scripts/builtin/lenetPredict.dml
+++ b/scripts/builtin/lenetPredict.dml
@@ -21,24 +21,20 @@
 
 # This builtin function makes prediction given data and trained LeNet model
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE           DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# model       List[unknown]   ---      Trained LeNet model
-# X           Matrix[Double]  ---      Input data matrix, of shape (N, C*Hin*Win)
-# C           Integer         ---      Number of input channels
-# Hin         Integer         ---      Input height
-# Win         Integer         ---      Input width
-# batch_size  Integer         ---      Batch size
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------------
+# model       Trained LeNet model
+# X           Input data matrix, of shape (N, C*Hin*Win)
+# C           Number of input channels
+# Hin         Input height
+# Win         Input width
+# batch_size  Batch size
+# ---------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# probs       Matrix[Double]           Predicted values
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# probs  Predicted values
+# ----------------------------------------------------------------------------------------
 
 source("nn/layers/lenetForwardPass.dml") as lenet_fw
 
diff --git a/scripts/builtin/lenetTrain.dml b/scripts/builtin/lenetTrain.dml
index b82acf3d06..2ddd9e764a 100644
--- a/scripts/builtin/lenetTrain.dml
+++ b/scripts/builtin/lenetTrain.dml
@@ -22,34 +22,30 @@
 # This builtin function trains LeNet CNN. The architecture of the
 # networks is:conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 ->
 # affine3 -> relu3 -> affine4 -> softmax
-
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME              TYPE              DEFAULT   MEANING
-# --------------------------------------------------------------------------------------------
-# X                 Matrix[Double]    ---       Input data matrix, of shape (N, C*Hin*Win)
-# Y                 Matrix[Double]    ---       Target matrix, of shape (N, K)
-# X_val             Matrix[Double]    ---       Validation data matrix, of shape (N, C*Hin*Win)
-# Y_val             Matrix[Double]    ---       Validation target matrix, of shape (N, K)
-# C                 Integer           ---       Number of input channels (dimensionality of input depth)
-# Hin               Integer           ---       Input width
-# Win               Integer           ---       Input height
-# batch_size        Integer           64        Batch size
-# epochs            Integer           20        Number of epochs
-# lr                Double            0.01      Learning rate
-# mu                Double            0.9       Momentum value
-# decay             Double            0.95      Learning rate decay
-# reg               Double            5e-04     Regularization strength
-# seed              Integer           -1        Seed for model initialization
-# verbose           Boolean           FALSE     Flag indicates if function should print to stdout
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# ----------------------------------------------------------
+# X           Input data matrix, of shape (N, C*Hin*Win)
+# Y           Target matrix, of shape (N, K)
+# X_val       Validation data matrix, of shape (N, C*Hin*Win)
+# Y_val       Validation target matrix, of shape (N, K)
+# C           Number of input channels (dimensionality of input depth)
+# Hin         Input width
+# Win         Input height
+# batch_size  Batch size
+# epochs      Number of epochs
+# lr          Learning rate
+# mu          Momentum value
+# decay       Learning rate decay
+# reg         Regularization strength
+# seed        Seed for model initialization
+# verbose     Flag indicates if function should print to stdout
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# model             List[unknown]               Trained model which can be used in lenetPredict
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------
+# model  Trained model which can be used in lenetPredict
+# -------------------------------------------------------------------------------
 
 source("nn/layers/affine.dml") as affine
 source("nn/layers/conv2d_builtin.dml") as conv2d
diff --git a/scripts/builtin/lm.dml b/scripts/builtin/lm.dml
index 2ffba76e97..9c8fe5b401 100644
--- a/scripts/builtin/lm.dml
+++ b/scripts/builtin/lm.dml
@@ -21,27 +21,24 @@
 
 # The lm-function solves linear regression using either the direct solve method or the conjugate gradient
 # algorithm depending on the input size of the matrices (See lmDS-function and lmCG-function respectively).
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---      Matrix of feature vectors.
-# y             Matrix[Double]    ---      1-column matrix of response values.
-# icpt          Integer           0        Intercept presence, shifting and rescaling the columns of X
-# reg           Double            1e-7     Regularization constant (lambda) for L2-regularization. set to nonzero
-#                                          for highly dependant/sparse/numerous features
-# tol           Double            1e-7     Tolerance (epsilon); conjugate gradient procedure terminates early if L2
-#                                          norm of the beta-residual is less than tolerance * its initial norm
-# maxi          Integer           0        Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose       Boolean           TRUE     If TRUE print messages are activated
-# ----------------------------------------------------------------------------------------------------------------------
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# B             Matrix[Double]    The model fit
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X        Matrix of feature vectors.
+# y        1-column matrix of response values.
+# icpt     Intercept presence, shifting and rescaling the columns of X
+# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
+#          for highly dependant/sparse/numerous features
+# tol      Tolerance (epsilon); conjugate gradient procedure terminates early if L2
+#          norm of the beta-residual is less than tolerance * its initial norm
+# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
+# verbose  If TRUE print messages are activated
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# --------------------------------------------------------------------------------------------
+# B     The model fit
+# --------------------------------------------------------------------------------------------
 
 m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
     return (Matrix[Double] B) {
diff --git a/scripts/builtin/lmCG.dml b/scripts/builtin/lmCG.dml
index 73fcc18e47..cbc6870899 100644
--- a/scripts/builtin/lmCG.dml
+++ b/scripts/builtin/lmCG.dml
@@ -21,26 +21,23 @@
 
 # The lmCG function solves linear regression using the conjugate gradient algorithm
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---      Matrix of feature vectors.
-# y             Matrix[Double]    ---      1-column matrix of response values.
-# icpt          Integer           0        Intercept presence, shifting and rescaling the columns of X
-# reg           Double            1e-7     Regularization constant (lambda) for L2-regularization. set to nonzero
-#                                          for highly dependant/sparse/numerous features
-# tol           Double            1e-7     Tolerance (epsilon); conjugate gradient procedure terminates early if L2
-#                                          norm of the beta-residual is less than tolerance * its initial norm
-# maxi          Integer           0        Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose       Boolean           TRUE     If TRUE print messages are activated
-# ----------------------------------------------------------------------------------------------------------------------
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# B             Matrix[Double]   The model fit
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X        Matrix of feature vectors.
+# y        1-column matrix of response values.
+# icpt     Intercept presence, shifting and rescaling the columns of X
+# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
+#          for highly dependant/sparse/numerous features
+# tol      Tolerance (epsilon); conjugate gradient procedure terminates early if L2
+#          norm of the beta-residual is less than tolerance * its initial norm
+# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
+# verbose  If TRUE print messages are activated
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------------------------
+# B     The model fit
+# ---------------------------------------------------------------------------------------------
 
 m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7, Double tol = 1e-7,
  Integer maxi = 0, Boolean verbose = TRUE) return (Matrix[Double] B) {
diff --git a/scripts/builtin/lmDS.dml b/scripts/builtin/lmDS.dml
index b5d4b41aac..d32e5cbe7a 100644
--- a/scripts/builtin/lmDS.dml
+++ b/scripts/builtin/lmDS.dml
@@ -21,26 +21,23 @@
 
 # The lmDC function solves linear regression using the direct solve method
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---      Matrix of feature vectors.
-# y             Matrix[Double]    ---      1-column matrix of response values.
-# icpt          Integer           0        Intercept presence, shifting and rescaling the columns of X
-# reg           Double            1e-7     Regularization constant (lambda) for L2-regularization. set to nonzero
-#                                          for highly dependant/sparse/numerous features
-# tol           Double            1e-7     Tolerance (epsilon); conjugate gradient procedure terminates early if L2
-#                                          norm of the beta-residual is less than tolerance * its initial norm
-# maxi          Integer           0        Maximum number of conjugate gradient iterations. 0 = no maximum
-# verbose       Boolean           TRUE     If TRUE print messages are activated
-# ----------------------------------------------------------------------------------------------------------------------
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# B             Matrix[Double]   The model fit
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X        Matrix of feature vectors.
+# y        1-column matrix of response values.
+# icpt     Intercept presence, shifting and rescaling the columns of X
+# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
+#          for highly dependant/sparse/numerous features
+# tol      Tolerance (epsilon); conjugate gradient procedure terminates early if L2
+#          norm of the beta-residual is less than tolerance * its initial norm
+# maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
+# verbose  If TRUE print messages are activated
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------------------------
+# B     The model fit
+# ---------------------------------------------------------------------------------------------
 
 m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7,
  Boolean verbose = TRUE) return (Matrix[Double] B) {
diff --git a/scripts/builtin/lmPredict.dml b/scripts/builtin/lmPredict.dml
index 20f8ca4722..d3f9a3e130 100644
--- a/scripts/builtin/lmPredict.dml
+++ b/scripts/builtin/lmPredict.dml
@@ -21,24 +21,20 @@
 
 # The lmPredict-function predicts the class of a feature vector
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---      Matrix of feature vectors
-# B             Matrix[Double]    ---      1-column matrix of weights.
-# ytest         Matrix[Double]    ---      test labels, used only for verbose output. can be set to matrix(0,1,1)
-#                                          if verbose output is not wanted
-# icpt          Integer           0        Intercept presence, shifting and rescaling the columns of X
-# verbose       Boolean           TRUE     If TRUE print messages are activated
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X        Matrix of feature vectors
+# B        1-column matrix of weights.
+# ytest    test labels, used only for verbose output. can be set to matrix(0,1,1)
+#          if verbose output is not wanted
+# icpt     Intercept presence, shifting and rescaling the columns of X
+# verbose  If TRUE print messages are activated
+# --------------------------------------------------------------------------------------
 #
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# yhat          Matrix[Double]             1-column matrix of classes
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# -----------------------------------------------------------------------------------
+# yhat  1-column matrix of classes
+# -----------------------------------------------------------------------------------
 
 m_lmPredict = function(Matrix[Double] X, Matrix[Double] B, 
   Matrix[Double] ytest = matrix(0,1,1), Integer icpt = 0, Boolean verbose = FALSE) 
diff --git a/scripts/builtin/logSumExp.dml b/scripts/builtin/logSumExp.dml
index ad886844db..6df5c5a3a3 100644
--- a/scripts/builtin/logSumExp.dml
+++ b/scripts/builtin/logSumExp.dml
@@ -21,22 +21,18 @@
 
 # Built-in LOGSUMEXP
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X          Matrix[Double]   ---         matrix M
-# margin     String           none        if the logsumexp of rows is required set margin = "row"
-#                                         if the logsumexp of columns is required set margin = "col"
-#                                         if set to "none" then a single scalar is returned computing logsumexp of matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# M       matrix to perform Log sum exp on.
+# margin  if the logsumexp of rows is required set margin = "row"
+#         if the logsumexp of columns is required set margin = "col"
+#         if set to "none" then a single scalar is returned computing logsumexp of matrix
+# --------------------------------------------------------------------------------------
 #
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# output     Matrix[Double]              a 1*1 matrix, row vector or column vector depends on margin value
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# ---------------------------------------------------------------------------------------
+# output  a 1*1 matrix, row vector or column vector depends on margin value
+# ---------------------------------------------------------------------------------------
 
 m_logSumExp = function(Matrix[Double] M, String margin = "none")
 return(Matrix[Double] output)
diff --git a/scripts/builtin/matrixProfile.dml b/scripts/builtin/matrixProfile.dml
index 715704f725..ad1f8d6cf6 100644
--- a/scripts/builtin/matrixProfile.dml
+++ b/scripts/builtin/matrixProfile.dml
@@ -29,25 +29,21 @@
 #   DOI: 10.1109/ICDM.2018.00099.
 #   https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                     TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# ts                       Matrix[Double]    ---      Time series to profile
-# window_size              Integer           4        Sliding window size
-# sample_percent           Double            1.0      Degree of approximation
-#                                                     between zero and one (1
-#                                                     computes the exact solution)
-# is_verbose               Boolean           False    Print debug information
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------
+# ts              Time series to profile
+# window_size     Sliding window size
+# sample_percent  Degree of approximation
+#                 between zero and one (1
+#                 computes the exact solution)
+# is_verbose      Print debug information
+# ----------------------------------------------------------------------------------
 #
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                    TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# profile                 Matrix[Double]            The computed matrix profile
-# profile_index           Matrix[Double]            Indices of least distances
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# -----------------------------------------------------------------------------------
+# profile        The computed matrix profile
+# profile_index  Indices of least distances
+# -----------------------------------------------------------------------------------
 
 m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE)
   return(Matrix[Double] profile, Matrix[Double] profile_index)
diff --git a/scripts/builtin/mcc.dml b/scripts/builtin/mcc.dml
index 644ec37a00..d61fb3badd 100644
--- a/scripts/builtin/mcc.dml
+++ b/scripts/builtin/mcc.dml
@@ -21,21 +21,17 @@
 
 # Built-in function mcc: Matthews' Correlation Coefficient for binary classification evaluation
 #
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE               DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# predictions     Matrix[Integer]      ---     Vector of predicted 0/1 values. 
-#                                                 (requires setting 'labels' parameter)
-# labels          Matrix[Integer]      ---     Vector of 0/1 labels.
-# ---------------------------------------------------------------------------------------------
- 
-#Output(s)
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# mattCC          Double    ---       Matthews' Correlation Coefficient
-# ---------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------
+# predictions  Vector of predicted 0/1 values. 
+#                 (requires setting 'labels' parameter)
+# labels       Vector of 0/1 labels.
+# -------------------------------------------------------------
+#
+# OUTPUT:
+# -----------------------------------------------------------------
+# mattCC  Matthews' Correlation Coefficient
+# -----------------------------------------------------------------
 
 m_mcc = function(Matrix[Double] predictions, Matrix[Double] labels)
 return (Double mattCC)
diff --git a/scripts/builtin/mdedup.dml b/scripts/builtin/mdedup.dml
index 9edf4aa15d..5c40c12ce5 100644
--- a/scripts/builtin/mdedup.dml
+++ b/scripts/builtin/mdedup.dml
@@ -22,25 +22,21 @@
 # Implements builtin for deduplication using matching dependencies (e.g. Street 0.95, City 0.90 -> ZIP 1.0)
 # and Jaccard distance.
 # 
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Frame             ---         Input Frame X
-# LHSfeatures     Matrix[Integer]   ---         A matrix 1xd with numbers of columns for MDs
-#                                               (e.g. Street 0.95, City 0.90 -> ZIP 1.0)
-# LHSthreshold    Matrix[Double]    ---         A matrix 1xd with threshold values in interval [0, 1] for MDs
-# RHSfeatures     Matrix[Integer]   ---         A matrix 1xd with numbers of columns for MDs
-# RHSthreshold    Matrix[Double]    ---         A matrix 1xd with threshold values in interval [0, 1] for MDs
-# verbose         Boolean           ---         To print the output
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X             Input Frame X
+# LHSfeatures   A matrix 1xd with numbers of columns for MDs
+#               (e.g. Street 0.95, City 0.90 -> ZIP 1.0)
+# LHSthreshold  A matrix 1xd with threshold values in interval [0, 1] for MDs
+# RHSfeatures   A matrix 1xd with numbers of columns for MDs
+# RHSthreshold  A matrix 1xd with threshold values in interval [0, 1] for MDs
+# verbose       To print the output
+# --------------------------------------------------------------------------------------
 #
-# Output:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# MD              Matrix[Double]               Matrix nx1 of duplicates
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# -------------------------------------------------------------------------------
+# MD    Matrix nx1 of duplicates
+# -------------------------------------------------------------------------------
 
 s_mdedup = function(Frame[String] X, Matrix[Double] LHSfeatures, Matrix[Double] LHSthreshold,
     Matrix[Double] RHSfeatures, Matrix[Double] RHSthreshold, Boolean verbose)
diff --git a/scripts/builtin/mice.dml b/scripts/builtin/mice.dml
index b44b944263..24ffaccf5b 100644
--- a/scripts/builtin/mice.dml
+++ b/scripts/builtin/mice.dml
@@ -21,29 +21,25 @@
 
 # This Builtin function implements multiple imputation using Chained Equations (MICE)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    ---        Data Matrix (Recoded Matrix for categorical features)
-# cMask           Matrix[Double]    ---        A 0/1 row vector for identifying numeric (0) and categorical features (1)
-# iter            Integer           3          Number of iteration for multiple imputations
-# threshold       Double            0.8        confidence value [0, 1] for robust imputation, values will only be imputed
-#                                              if the predicted value has probability greater than threshold,
-#                                              only applicable for categorical data
-# verbose         Boolean           FALSE      Boolean value.
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# output          Matrix[Double]               imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
-#
 # Assumption missing value are represented with empty string i.e ",," in CSV file  
 # variables with suffix n are storing continuos/numeric data and variables with 
 # suffix c are storing categorical data
+#
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X          Data Matrix (Recoded Matrix for categorical features)
+# cMask      A 0/1 row vector for identifying numeric (0) and categorical features (1)
+# iter       Number of iteration for multiple imputations
+# threshold  confidence value [0, 1] for robust imputation, values will only be imputed
+#            if the predicted value has probability greater than threshold,
+#            only applicable for categorical data
+# verbose    Boolean value.
+# ------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------------
+# output  imputed dataset
+# ---------------------------------------------------------------------------------
 
 m_mice= function(Matrix[Double] X, Matrix[Double] cMask, Integer iter = 3, 
   Double threshold = 0.8, Boolean verbose = FALSE)
diff --git a/scripts/builtin/miceApply.dml b/scripts/builtin/miceApply.dml
index 5c0d116896..448310ef3c 100644
--- a/scripts/builtin/miceApply.dml
+++ b/scripts/builtin/miceApply.dml
@@ -21,32 +21,28 @@
 
 # This Builtin function implements multiple imputation using Chained Equations (MICE)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X              Matrix[Double]    ---       Data Matrix (Recoded Matrix for categorical features)
-# mtea           Matrix[Double]    ---       A meta matrix with each rows storing values 1) mask of original matrix, 
-#                                              2) information of columns with missing values on  original data 0 for no missing value in column and 1 otherwise
-#                                              3) dist values in each columns in original data 1 for continuous columns and colMax for categorical
-# threshold      Double            0.8       confidence value [0, 1] for robust imputation, values will only be imputed
-#                                              if the predicted value has probability greater than threshold,
-#                                              only applicable for categorical data
-# dM             Frame[Unknown]                meta frame from OHE on original data
-# betaList       List[Unknown]    --         List of machine learning models trained for each column imputation
-# verbose        Boolean           FALSE     Boolean value.
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# output          Matrix[Double]               imputed dataset
-# ----------------------------------------------------------------------------------------------------------------------
-#
 # Assumption missing value are represented with empty string i.e ",," in CSV file  
 # variables with suffix n are storing continuos/numeric data and variables with 
 # suffix c are storing categorical data
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X          Data Matrix (Recoded Matrix for categorical features)
+# mtea       A meta matrix with each rows storing values 1) mask of original matrix, 
+#              2) information of columns with missing values on  original data 0 for no missing value in column and 1 otherwise
+#              3) dist values in each columns in original data 1 for continuous columns and colMax for categorical
+# threshold  confidence value [0, 1] for robust imputation, values will only be imputed
+#              if the predicted value has probability greater than threshold,
+#              only applicable for categorical data
+# dM           meta frame from OHE on original data
+# betaList   List of machine learning models trained for each column imputation
+# verbose    Boolean value.
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------------
+# output  imputed dataset
+# ---------------------------------------------------------------------------------
 
 m_miceApply = function(Matrix[Double] X, Matrix[Double] meta, Double threshold, Frame[String] dM, List[Unknown] betaList)
   return(Matrix[Double] output)
diff --git a/scripts/builtin/msvm.dml b/scripts/builtin/msvm.dml
index 3f2dbde51e..4bf904f822 100644
--- a/scripts/builtin/msvm.dml
+++ b/scripts/builtin/msvm.dml
@@ -19,30 +19,26 @@
 #
 #-------------------------------------------------------------
 
-# Implements builtin multiclass SVM with squared slack variables, 
+# Implements builtin multi-class SVM with squared slack variables, 
 # learns one-against-the-rest binary-class classifiers by making a function call to l2SVM
 #
-# INPUT PARAMETERS:
-#-----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-#-----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         matrix X of feature vectors
-# Y               Matrix[Double]  ---         matrix Y of class labels
-# intercept       Boolean         False       No Intercept ( If set to TRUE then a constant bias column is added to X)
-# num_classes     integer         10          Number of classes
-# epsilon         Double          0.001       Procedure terminates early if the reduction in objective function
-#                                             value is less than epsilon (tolerance) times the initial objective function value.
-# reg             Double          1.0         Regularization parameter (lambda) for L2 regularization
-# maxIterations   Int             100         Maximum number of conjugate gradient iterations
-# verbose         Boolean         False       Set to true to print while training.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+#------------------------------------------------------------------------------------------
+# X              matrix X of feature vectors
+# Y              matrix Y of class labels
+# intercept      No Intercept ( If set to TRUE then a constant bias column is added to X)
+# num_classes    Number of classes
+# epsilon        Procedure terminates early if the reduction in objective function
+#                value is less than epsilon (tolerance) times the initial objective function value.
+# reg            Regularization parameter (lambda) for L2 regularization
+# maxIterations  Maximum number of conjugate gradient iterations
+# verbose        Set to true to print while training.
+# -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
-#-----------------------------------------------------------------------------------------------------------------------
-# model           Matrix[Double]             model matrix
-#-----------------------------------------------------------------------------------------------------------------------
+#-----------------------------------------------------------------------------------
+# model  model matrix
+#-----------------------------------------------------------------------------------
 
 m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
     Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100,
diff --git a/scripts/builtin/msvmPredict.dml b/scripts/builtin/msvmPredict.dml
index d34d086edd..f869d7dc05 100644
--- a/scripts/builtin/msvmPredict.dml
+++ b/scripts/builtin/msvmPredict.dml
@@ -21,22 +21,18 @@
 
 # This Scripts helps in applying an trained MSVM
 #
-# INPUT PARAMETERS:
-#-----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE               DEFAULT     MEANING
-#-----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]     ---         matrix X of feature vectors to classify
-# W               Matrix[Double]     ---         matrix of the trained variables
-#-----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+#------------------------------------------------------------------------------
+# X     matrix X of feature vectors to classify
+# W     matrix of the trained variables
+#------------------------------------------------------------------------------
 #
 # OUTPUT:
-#-----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                          MEANING
-#-----------------------------------------------------------------------------------------------------------------------
-# YRaw            Matrix[Double]                Classification Labels Raw, meaning not modified to clean
-#                                               Labeles of 1's and -1's
-# Y               Matrix[Double]                Classification Labels Maxed to ones and zeros.
-#-----------------------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------------------------
+# YRaw  Classification Labels Raw, meaning not modified to clean
+#       Labeles of 1's and -1's
+# Y     Classification Labels Maxed to ones and zeros.
+#-------------------------------------------------------------------------------
 
 m_msvmPredict = function(Matrix[Double] X, Matrix[Double] W)
   return(Matrix[Double] YRaw, Matrix[Double] Y)
diff --git a/scripts/builtin/multiLogReg.dml b/scripts/builtin/multiLogReg.dml
index d6412e86bf..21c87a35e4 100644
--- a/scripts/builtin/multiLogReg.dml
+++ b/scripts/builtin/multiLogReg.dml
@@ -24,28 +24,24 @@
 # The largest label represents the baseline category; if label -1 or 0 is present, then it is
 # the baseline label (and it is converted to the largest label).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]      ---       Location to read the matrix of feature vectors
-# Y         Matrix[Double]      ---       Location to read the matrix with category labels
-# icpt      Integer             0         Intercept presence, shifting and rescaling X columns: 0 = no intercept,
-#                                         no shifting, no rescaling; 1 = add intercept, but neither shift nor
-#                                         rescale X; 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# tol       Double              0.000001  tolerance ("epsilon")
-# reg       Double              0.0       regularization parameter (lambda = 1/C); intercept is not regularized
-# maxi      Integer             100       max. number of outer (Newton) iterations
-# maxii     Integer             0         max. number of inner (conjugate gradient) iterations, 0 = no max
-# verbose   Boolean             FALSE     flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X        Location to read the matrix of feature vectors
+# Y        Location to read the matrix with category labels
+# icpt     Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+#          no shifting, no rescaling; 1 = add intercept, but neither shift nor
+#          rescale X; 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# tol      tolerance ("epsilon")
+# reg      regularization parameter (lambda = 1/C); intercept is not regularized
+# maxi     max. number of outer (Newton) iterations
+# maxii    max. number of inner (conjugate gradient) iterations, 0 = no max
+# verbose  flag specifying if logging information should be printed
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# betas     Matrix[Double]               regression betas as output for prediction
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
+# betas  regression betas as output for prediction
+# --------------------------------------------------------------------------------------
 
 m_multiLogReg = function(Matrix[Double] X, Matrix[Double] Y, Int icpt = 2, 
   Double tol=1e-6, Double reg=1.0, Int maxi=100, Int maxii=20, Boolean verbose=TRUE)
diff --git a/scripts/builtin/multiLogRegPredict.dml b/scripts/builtin/multiLogRegPredict.dml
index b17d8ca326..dc5c0332ab 100644
--- a/scripts/builtin/multiLogRegPredict.dml
+++ b/scripts/builtin/multiLogRegPredict.dml
@@ -22,24 +22,20 @@
 # THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF MULTINOMIAL LOGISTIC REGRESSION TO A NEW (TEST) DATASET
 # Matrix M of predicted means/probabilities, some statistics in CSV format (see below)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE                 DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]       ---      Data Matrix X
-# B             Matrix[Double]       ---      Regression parameters betas
-# Y             Matrix[Double]       ---      Response vector Y
-# verbose       Boolean              FALSE    flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------
+# X        Data Matrix X
+# B        Regression parameters betas
+# Y        Response vector Y
+# verbose  flag specifying if logging information should be printed
+# -----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M              Matrix[Double]              Matrix M of predicted means/probabilities
-# predicted_Y    Matrix[Double]              Predicted response vector
-# accuracy       Double                      scalar value of accuracy
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# M            Matrix M of predicted means/probabilities
+# predicted_Y  Predicted response vector
+# accuracy     scalar value of accuracy
+# ----------------------------------------------------------------------------------------
 
 m_multiLogRegPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] Y, Boolean verbose = FALSE)
   return(Matrix[Double] M, Matrix[Double] predicted_Y, Double accuracy)
diff --git a/scripts/builtin/na_locf.dml b/scripts/builtin/na_locf.dml
index 30f7572723..0a6f7c6350 100644
--- a/scripts/builtin/na_locf.dml
+++ b/scripts/builtin/na_locf.dml
@@ -21,22 +21,18 @@
 
 # Builtin function for imputing missing values using forward fill and backward fill techniques
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    ---        Matrix X
-# option          String            "locf"     String "locf" (last observation moved forward) to do forward fill
-#                                   String     "nocb" (next observation carried backward) to do backward fill
-# verbose         Boolean           FALSE      to print output on screen
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------
+# X        Matrix X
+# option   String "locf" (last observation moved forward) to do forward fill
+#          "nocb" (next observation carried backward) to do backward fill
+# verbose  to print output on screen
+# ----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# output          Matrix[Double]               Matrix with no missing values
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------
+# output  Matrix with no missing values
+# ---------------------------------------------------------------------------------
 
 m_na_locf = function(Matrix[Double] X, String option = "locf", Boolean verbose = FALSE)
   return(Matrix[Double] output)
diff --git a/scripts/builtin/naiveBayes.dml b/scripts/builtin/naiveBayes.dml
index 888de142fd..d6e2028a5d 100644
--- a/scripts/builtin/naiveBayes.dml
+++ b/scripts/builtin/naiveBayes.dml
@@ -21,23 +21,19 @@
 
 # The naiveBayes-function computes the class conditional probabilities and class priors.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE              DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# D         Matrix[Double]    ---        One dimensional column matrix with N rows.
-# C         Matrix[Double]    ---        One dimensional column matrix with N rows.
-# Laplace   Double            1          Any Double value.
-# Verbose   Boolean           TRUE       Boolean value.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# D        One dimensional column matrix with N rows.
+# C        One dimensional column matrix with N rows.
+# laplace  Any Double value.
+# verbose  Boolean value.
+# ----------------------------------------------------------------------------------------
 #
-# OUTPUT
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# prior               Matrix[Double]     Class priors, One dimensional column matrix with N rows.
-# classConditionals   Matrix[Double]     Class conditional probabilites, One dimensional column matrix with N rows.
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# --------------------------------------------------------------------------------------------------
+# prior              Class priors, One dimensional column matrix with N rows.
+# classConditionals  Class conditional probabilities, One dimensional column matrix with N rows.
+# --------------------------------------------------------------------------------------------------
 
 m_naiveBayes = function(Matrix[Double] D,
   Matrix[Double] C, Double laplace = 1, Boolean verbose = TRUE)
diff --git a/scripts/builtin/naiveBayesPredict.dml b/scripts/builtin/naiveBayesPredict.dml
index 7efb965cf3..2d3615eff4 100644
--- a/scripts/builtin/naiveBayesPredict.dml
+++ b/scripts/builtin/naiveBayesPredict.dml
@@ -21,22 +21,18 @@
 
 # The naiveBaysePredict-function predicts the scoring with a naive Bayes model.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]   ---       Matrix of test data with N rows.
-# P         Matrix[Double]   ---       Class priors, One dimensional column matrix with N rows.
-# C         Matrix[Double]   ---       Class conditional probabilities, matrix with N rows
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X     Matrix of test data with N rows.
+# P     Class priors, One dimensional column matrix with N rows.
+# C     Class conditional probabilities, matrix with N rows
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y         Matrix[Double]             A matrix containing the top-K item-ids with highest predicted ratings.
-# YRaw      Matrix[Double]             A matrix containing predicted ratings.
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------
+# Y     A matrix containing the top-K item-ids with highest predicted ratings.
+# YRaw  A matrix containing predicted ratings.
+# ---------------------------------------------------------------------------------------
 
 m_naiveBayesPredict = function(Matrix[Double] X, Matrix[Double] P, Matrix[Double] C)
  return (Matrix[Double] YRaw, Matrix[Double] Y)
diff --git a/scripts/builtin/normalize.dml b/scripts/builtin/normalize.dml
index 9e724045e3..1f136757aa 100644
--- a/scripts/builtin/normalize.dml
+++ b/scripts/builtin/normalize.dml
@@ -22,21 +22,17 @@
 # Min-max normalization (a.k.a. min-max scaling) to range [0,1]. For matrices 
 # of positive values, this normalization preserves the input sparsity.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]   ---       Input feature matrix of shape n-by-m
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X     Input feature matrix of shape n-by-m
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y         Matrix[Double]             Modified output feature matrix of shape n-by-m
-# cmin      Matrix[Double]             Colunm minima of shape 1-by-m
-# cmax      Matrix[Double]             Column maxima of shape 1-by-m
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------
+# Y     Modified output feature matrix of shape n-by-m
+# cmin  Column minima of shape 1-by-m
+# cmax  Column maxima of shape 1-by-m
+# ---------------------------------------------------------------------------------------
 
 m_normalize = function(Matrix[Double] X)
   return (Matrix[Double] Y, Matrix[Double] cmin, Matrix[Double] cmax)
diff --git a/scripts/builtin/normalizeApply.dml b/scripts/builtin/normalizeApply.dml
index e63f65a7d2..e4c247ead4 100644
--- a/scripts/builtin/normalizeApply.dml
+++ b/scripts/builtin/normalizeApply.dml
@@ -24,21 +24,17 @@
 # preserves the input sparsity. The validity of the provided min-max range
 # and post-processing is under control of the caller. 
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------------------------------
-# NAME     TYPE     DEFAULT   MEANING
-# ------------------------------------------------------------------------------
-# X        Matrix[Double]    ---      Input feature matrix of shape n-by-m
-# cmin     Matrix[Double]    ---      Colunm minima of shape 1-by-m
-# cmax     Matrix[Double]    ---      Column maxima of shape 1-by-m
-# ------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------
+# X     Input feature matrix of shape n-by-m
+# cmin  Colunm minima of shape 1-by-m
+# cmax  Column maxima of shape 1-by-m
+# ------------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------
-# NAME     TYPE                       MEANING
-# ------------------------------------------------------------------------------
-# Y        Matrix[Double]             Modified output feature matrix of shape n-by-m
-# ------------------------------------------------------------------------------
+# ------------------------------------------------
+# Y     Modified output feature matrix of shape n-by-m
+# ------------------------------------------------
 
 
 m_normalizeApply = function(Matrix[Double] X, Matrix[Double] cmin, Matrix[Double] cmax)
diff --git a/scripts/builtin/outlier.dml b/scripts/builtin/outlier.dml
index 1026052376..0e0c4f50f8 100644
--- a/scripts/builtin/outlier.dml
+++ b/scripts/builtin/outlier.dml
@@ -22,21 +22,17 @@
 # This outlier-function takes a matrix data set as input from where it determines
 # which point(s) have the largest difference from mean.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]    ---       Matrix of Recoded dataset for outlier evaluation
-# opposite  Boolean           ---       (1)TRUE for evaluating outlier from upper quartile range,
-#                                       (0)FALSE for evaluating outlier from lower quartile range
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X         Matrix of Recoded dataset for outlier evaluation
+# opposite  (1)TRUE for evaluating outlier from upper quartile range,
+#           (0)FALSE for evaluating outlier from lower quartile range
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y         Matrix[Double]             matrix indicating outlier values
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------
+# Y     matrix indicating outlier values
+# ---------------------------------------------------------------------------------------
 
 m_outlier = function(Matrix[Double] X, Boolean opposite) return (Matrix[Double] Y) {
   # determine if largest value has largest diff from mean
diff --git a/scripts/builtin/outlierByArima.dml b/scripts/builtin/outlierByArima.dml
index 8142860728..9292ad5d62 100644
--- a/scripts/builtin/outlierByArima.dml
+++ b/scripts/builtin/outlierByArima.dml
@@ -22,31 +22,27 @@
 # Built-in function for detecting and repairing outliers in time series, by training an ARIMA model
 # and classifying values that are more than k standard-deviations away from the predicated values as outliers.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    ---      Matrix X
-# k               Matrix[Double]    3        threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
-# repairMethod    Integer           1        values: 0 = delete rows having outliers, 1 = replace outliers as zeros
-#                                                    2 = replace outliers as missing values 
-# p               Int               0        non-seasonal AR order
-# d               Int               0        non-seasonal differencing order
-# q               Int               0        non-seasonal MA order
-# P               Int               0        seasonal AR order
-# D               Int               0        seasonal differencing order
-# Q               Int               0        seasonal MA order
-# s               Int               1        period in terms of number of time-steps
-# include_mean    Bool              FALSE
-# solver          String            "jacobi" solver, is either "cg" or "jacobi"
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X               Matrix X
+# k               threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
+# repairMethod    values: 0 = delete rows having outliers, 1 = replace outliers as zeros
+#                         2 = replace outliers as missing values 
+# p               non-seasonal AR order
+# d               non-seasonal differencing order
+# q               non-seasonal MA order
+# P               seasonal AR order
+# D               seasonal differencing order
+# Q               seasonal MA order
+# s               period in terms of number of time-steps
+# include_mean    If the mean should be included
+# solver          solver, is either "cg" or "jacobi"
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_corrected     Matrix[Double]     Matrix X with no outliers
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------
+# X_corrected   Matrix X with no outliers
+# -------------------------------------------------------------------------------------------------
 
 m_outlierByArima = function(Matrix[Double] X, Double k = 3, Integer repairMethod = 1, Integer p=0, Integer d=0,
   Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1, Boolean include_mean=FALSE, String solver="jacobi")
diff --git a/scripts/builtin/outlierByIQR.dml b/scripts/builtin/outlierByIQR.dml
index 754642dde6..7abf43c065 100644
--- a/scripts/builtin/outlierByIQR.dml
+++ b/scripts/builtin/outlierByIQR.dml
@@ -21,34 +21,23 @@
 
 # Builtin function for detecting and repairing outliers using standard deviation 
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---       Matrix X
-# k               Double           1.5       a constant used to discern outliers k*IQR
-# isIterative     Boolean          TRUE      iterative repair or single repair
-# repairMethod    Integer          1         values: 0 = delete rows having outliers,
-#                                               1 = replace outliers with zeros
-#                                               2 = replace outliers as missing values
-# max_iterations  Integer          0         values: 0 = arbitrary number of iteraition until all outliers are removed,
-#                                            n = any constant defined by user
-# verbose         Boolean          FALSE     flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X               Matrix X
+# k               a constant used to discern outliers k*IQR
+# isIterative     iterative repair or single repair
+# repairMethod    values: 0 = delete rows having outliers,
+#                    1 = replace outliers with zeros
+#                    2 = replace outliers as missing values
+# max_iterations  values: 0 = arbitrary number of iteraition until all outliers are removed,
+#                 n = any constant defined by user
+# verbose         flag specifying if logging information should be printed
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y               Matrix[Double]             Matrix X with no outliers
-# ----------------------------------------------------------------------------------------------------------------------
-
-
-#Output(s)
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# Y               Double   ---       Matrix X with no outliers
+# ---------------------------------------------------------------------------------
+# Y     Matrix X with no outliers
+# ---------------------------------------------------------------------------------
 
 m_outlierByIQR = function(Matrix[Double] X, Double k =1.5, Integer repairMethod = 1, 
   Integer max_iterations, Boolean verbose = TRUE) 
diff --git a/scripts/builtin/outlierByIQRApply.dml b/scripts/builtin/outlierByIQRApply.dml
index f2ee947fb1..507b8340e8 100644
--- a/scripts/builtin/outlierByIQRApply.dml
+++ b/scripts/builtin/outlierByIQRApply.dml
@@ -21,33 +21,22 @@
 
 # Builtin function for repairing outliers by IQR 
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---      Matrix X
-# Q1              Matrix[Double]   ---      first quartile
-# Q3              Matrix[Double]   ---      third quartile
-# IQR             Matrix[Double]   ---      Inter-quartile range
-# k               Double           --       a constant used to discern outliers k*IQR
-# repairMethod    Integer          1         values: 0 = delete rows having outliers,
-#                                               1 = replace outliers with zeros
-#                                               2 = replace outliers as missing values
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X             Matrix X
+# Q1            first quartile
+# Q3            third quartile
+# IQR           Inter-quartile range
+# k             a constant used to discern outliers k*IQR
+# repairMethod   values: 0 = delete rows having outliers,
+#                   1 = replace outliers with zeros
+#                   2 = replace outliers as missing values
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
+# Y    Matrix X with no outliers
 # ----------------------------------------------------------------------------------------------------------------------
-# Y               Matrix[Double]             Matrix X with no outliers
-# ----------------------------------------------------------------------------------------------------------------------
-
-
-#Output(s)
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# Y               Double   ---       Matrix X with no outliers
 
 m_outlierByIQRApply = function(Matrix[Double] X, Matrix[Double] Q1, Matrix[Double] Q3, Matrix[Double] IQR, Double k, Integer repairMethod) 
   return(Matrix[Double] Y) 
diff --git a/scripts/builtin/outlierBySd.dml b/scripts/builtin/outlierBySd.dml
index e5220e035d..0e7a192f3f 100644
--- a/scripts/builtin/outlierBySd.dml
+++ b/scripts/builtin/outlierBySd.dml
@@ -21,24 +21,20 @@
 
 # Builtin function for detecting and repairing outliers using standard deviation
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]    ---         Matrix X
-# k               Double            3           threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
-# repairMethod    Integer           1           values: 0 = delete rows having outliers, 1 = replace outliers as  zeros
-#                                               2 = replace outliers as missing values
-# max_iterations  Integer           0           values: 0 = arbitrary number of iteration until all outliers are removed,
-#                                               n = any constant defined by user
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# X               Matrix X
+# k               threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
+# repairMethod    values: 0 = delete rows having outliers, 1 = replace outliers as  zeros
+#                 2 = replace outliers as missing values
+# max_iterations  values: 0 = arbitrary number of iteration until all outliers are removed,
+#                 n = any constant defined by user
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE                          MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y              Matrix[Double]                Matrix X with no outliers
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------
+# Y     Matrix X with no outliers
+# -------------------------------------------------------------------------------
 
 m_outlierBySd = function(Matrix[Double] X, Double k = 3, Integer repairMethod = 1, 
   Integer max_iterations, Boolean verbose = TRUE) 
diff --git a/scripts/builtin/outlierBySdApply.dml b/scripts/builtin/outlierBySdApply.dml
index 6b11a39eeb..e098cf6489 100644
--- a/scripts/builtin/outlierBySdApply.dml
+++ b/scripts/builtin/outlierBySdApply.dml
@@ -21,35 +21,24 @@
 
 # Builtin function for detecting and repairing outliers using standard deviation 
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---       Matrix X
-# colMean         Matrix[Double]   ---       Matrix X
-# k               Double           1.5       a constant used to discern outliers k*IQR
-# isIterative     Boolean          TRUE      iterative repair or single repair
-# repairMethod    Integer          1         values: 0 = delete rows having outliers,
-#                                               1 = replace outliers with zeros
-#                                               2 = replace outliers as missing values
-# max_iterations  Integer          0         values: 0 = arbitrary number of iteraition until all outliers are removed,
-#                                            n = any constant defined by user
-# verbose         Boolean          FALSE     flag specifying if logging information should be printed
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X               Matrix X
+# colMean         Matrix X
+# k               a constant used to discern outliers k*IQR
+# isIterative     iterative repair or single repair
+# repairMethod    values: 0 = delete rows having outliers,
+#                    1 = replace outliers with zeros
+#                    2 = replace outliers as missing values
+# max_iterations  values: 0 = arbitrary number of iteraition until all outliers are removed,
+#                 n = any constant defined by user
+# verbose         flag specifying if logging information should be printed
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y               Matrix[Double]             Matrix X with no outliers
-# ----------------------------------------------------------------------------------------------------------------------
-
-
-#Output(s)
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# Y               Double   ---       Matrix X with no outliers
+# ---------------------------------------------------------------------------------
+# Y     Matrix X with no outliers
+# ---------------------------------------------------------------------------------
 
 m_outlierBySdApply = function(Matrix[Double] X, Matrix[Double] colMean, Matrix[Double] colSD, Double k, Integer repairMethod) 
   return(Matrix[Double] Y) 
diff --git a/scripts/builtin/pca.dml b/scripts/builtin/pca.dml
index 0ae5c3d56b..8a8e0a4bd7 100644
--- a/scripts/builtin/pca.dml
+++ b/scripts/builtin/pca.dml
@@ -21,25 +21,21 @@
 
 # The function Principal Component Analysis (PCA) is used for dimensionality reduction
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE             DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X        Matrix[Double]   ---      Input feature matrix
-# K        Int              2        Number of reduced dimensions (i.e., columns)
-# Center   Boolean          TRUE     Indicates whether or not to center the feature matrix
-# Scale    Boolean          TRUE     Indicates whether or not to scale the feature matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X       Input feature matrix
+# K       Number of reduced dimensions (i.e., columns)
+# Center  Indicates whether or not to center the feature matrix
+# Scale   Indicates whether or not to scale the feature matrix
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Xout        Matrix[Double]        Output feature matrix with K columns
-# Clusters        Matrix[Double]        Output dominant eigen vectors (can be used for projections)
-# Centering   Matrix[Double]        The column means of the input, subtracted to construct the PCA
-# ScaleFactor Matrix[Double]        The Scaling of the values, to make each dimension same size.
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------
+# Xout         Output feature matrix with K columns
+# Clusters     Output dominant eigen vectors (can be used for projections)
+# Centering    The column means of the input, subtracted to construct the PCA
+# ScaleFactor  The Scaling of the values, to make each dimension same size.
+# -------------------------------------------------------------------------------------------------
 
 m_pca = function(Matrix[Double] X, Integer K=2, Boolean center=TRUE, Boolean scale=TRUE)
   return (Matrix[Double] Xout, Matrix[Double] Clusters, Matrix[Double] Centering, Matrix[Double] ScaleFactor) 
diff --git a/scripts/builtin/pcaInverse.dml b/scripts/builtin/pcaInverse.dml
index ff8f662b76..4b16ec6a90 100644
--- a/scripts/builtin/pcaInverse.dml
+++ b/scripts/builtin/pcaInverse.dml
@@ -23,22 +23,18 @@
 # This methods allows to reconstruct an approximation of the original matrix, and is useful for
 # calculating how much information is lost in the PCA.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE             DEFAULT       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y             Matrix[Double]   ---           Input features that have PCA applied to them
-# Clusters      Matrix[Double]   ---           The previous PCA components computed
-# Centering     Matrix[Double]   empty matrix  The column means of the PCA model, subtracted to construct the PCA
-# ScaleFactor   Matrix[Double]   empty matrix  The scaling of each dimension in the PCA model
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# Y            Input features that have PCA applied to them
+# Clusters     The previous PCA components computed
+# Centering    The column means of the PCA model, subtracted to construct the PCA
+# ScaleFactor  The scaling of each dimension in the PCA model
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]            Output feature matrix reconstructing and approximation of the original matrix
-----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------
+# X     Output feature matrix reconstructing and approximation of the original matrix
+# ------------------------------------------------------------------------------------
 
 m_pcaInverse = function(Matrix[Double] Y, Matrix[Double] Clusters, 
   Matrix[Double] Centering = matrix(0, rows= 0, cols=0), 
diff --git a/scripts/builtin/pcaTransform.dml b/scripts/builtin/pcaTransform.dml
index 07bdbee51e..db741cf53d 100644
--- a/scripts/builtin/pcaTransform.dml
+++ b/scripts/builtin/pcaTransform.dml
@@ -23,22 +23,18 @@
 # This method is used to transpose data, which the PCA model was not trained on. To validate how good
 # The PCA is, and to apply in production. 
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE             DEFAULT         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---            Input feature matrix
-# Clusters      Matrix[Double]    ---            The previously computed principal components
-# Centering     Matrix[Double]    empty matrix   The column means of the PCA model, subtracted to construct the PCA
-# ScaleFactor   Matrix[Double]    empty matrix   The scaling of each dimension in the PCA model
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# X            Input feature matrix
+# Clusters     The previously computed principal components
+# Centering    The column means of the PCA model, subtracted to construct the PCA
+# ScaleFactor  The scaling of each dimension in the PCA model
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y             Matrix[Double]    Output feature matrix dimensionally reduced by PCA
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# Y     Output feature matrix dimensionally reduced by PCA
+# --------------------------------------------------------------------------------------------
 
 m_pcaTransform = function(Matrix[Double] X, Matrix[Double] Clusters,
   Matrix[Double] Centering = matrix(0, rows= 0, cols=0), 
diff --git a/scripts/builtin/pnmf.dml b/scripts/builtin/pnmf.dml
index 3213d85317..721ab7232b 100644
--- a/scripts/builtin/pnmf.dml
+++ b/scripts/builtin/pnmf.dml
@@ -27,24 +27,20 @@
 # Distributed nonnegative matrix factorization for web-scale dyadic 
 # data analysis on mapreduce. WWW 2010: 681-690]
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X           Matrix[Double]    ---      Matrix of feature vectors.
-# rnk         Integer           ---      Number of components into which matrix X is to be factored.
-# eps         Double            10^-8    Tolerance
-# maxi        Integer           10       Maximum number of conjugate gradient iterations.
-# verbose     Boolean           TRUE     If TRUE, 'iter' and 'obj' are printed.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# X        Matrix of feature vectors.
+# rnk      Number of components into which matrix X is to be factored.
+# eps      Tolerance
+# maxi     Maximum number of conjugate gradient iterations.
+# verbose  If TRUE, 'iter' and 'obj' are printed.
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# W          Matrix[Double]               List of pattern matrices, one for each repetition.
-# H          Matrix[Double]               List of amplitude matrices, one for each repetition.
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------
+# W     List of pattern matrices, one for each repetition.
+# H     List of amplitude matrices, one for each repetition.
+# ------------------------------------------------------------------------------------
 
 m_pnmf = function(Matrix[Double] X, Integer rnk, Double eps = 1e-8, Integer maxi = 10, Boolean verbose=TRUE) 
   return (Matrix[Double] W, Matrix[Double] H) 
diff --git a/scripts/builtin/ppca.dml b/scripts/builtin/ppca.dml
index 7d09b34bcd..b209ba8441 100644
--- a/scripts/builtin/ppca.dml
+++ b/scripts/builtin/ppca.dml
@@ -23,26 +23,21 @@
 # It is based on paper: sPCA: Scalable Principal Component Analysis for Big Data on Distributed
 # Platforms. Tarek Elgamal et.al.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE            DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]  ---      n x m input feature matrix
-# k         Integer         ---      indicates dimension of the new vector space constructed from eigen vectors
-# maxi      Integer         ---      maximum number of iterations until convergence
-# tolobj    Double          0.00001  objective function tolerance value to stop ppca algorithm
-# tolrecerr Double          0.02     reconstruction error tolerance value to stop the algorithm
-# verbose   Boolen          TRUE     verbose debug output
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------------
+# X          n x m input feature matrix
+# k          indicates dimension of the new vector space constructed from eigen vectors
+# maxi       maximum number of iterations until convergence
+# tolobj     objective function tolerance value to stop ppca algorithm
+# tolrecerr  reconstruction error tolerance value to stop the algorithm
+# verbose    verbose debug output
+# ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Xout     Matrix[Double]    Output feature matrix with K columns
-# Mout     Matrix[Double]    Output dominant eigen vectors (can be used for projections)
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------------
+# Xout  Output feature matrix with K columns
+# Mout  Output dominant eigen vectors (can be used for projections)
+# -------------------------------------------------------------------------------------------------
 
 m_ppca = function(Matrix[Double] X, Integer K=2, Integer maxi = 10, 
   Double tolobj = 0.00001, Double tolrecerr = 0.02, Boolean verbose = TRUE)
diff --git a/scripts/builtin/randomForest.dml b/scripts/builtin/randomForest.dml
index 7b80cbcfc5..bf26703508 100644
--- a/scripts/builtin/randomForest.dml
+++ b/scripts/builtin/randomForest.dml
@@ -21,53 +21,49 @@
 
 # This script implement classification random forest with both scale and categorical features.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             DEFAULT      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X               Matrix[Double]   ---          Feature matrix X; note that X needs to be both recoded and dummy coded
-# Y               Matrix[Double]   ---          Label matrix Y; note that Y needs to be both recoded and dummy coded
-# R               Matrix[Double]   " "          Matrix which for each feature in X contains the following information
-#                                               - R[,1]: column ids       TODO pass recorded and binned
-#                                               - R[,2]: start indices
-#                                               - R[,3]: end indices
-#                                               If R is not provided by default all variables are assumed to be scale
-# bins            Int              20           Number of equiheight bins per scale feature to choose thresholds
-# depth           Int              25           Maximum depth of the learned tree
-# num_leaf        Int              10           Number of samples when splitting stops and a leaf node is added
-# num_samples     Int              3000         Number of samples at which point we switch to in-memory subtree building
-# num_trees       Int              10           Number of trees to be learned in the random forest model
-# subsamp_rate    Double           1.0          Parameter controlling the size of each tree in the forest; samples are selected from a
-#                                               Poisson distribution with parameter subsamp_rate (the default value is 1.0)
-# feature_subset  Double           0.5          Parameter that controls the number of feature used as candidates for splitting at each tree node
-#                                               as a power of number of features in the dataset;
-#                                               by default square root of features (i.e., feature_subset = 0.5) are used at each tree node
-# impurity        String           "Gini"       Impurity measure: entropy or Gini (the default)
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# X               Feature matrix X; note that X needs to be both recoded and dummy coded
+# Y               Label matrix Y; note that Y needs to be both recoded and dummy coded
+# R               Matrix which for each feature in X contains the following information
+#                 - R[,1]: column ids       TODO pass recorded and binned
+#                 - R[,2]: start indices
+#                 - R[,3]: end indices
+#                 If R is not provided by default all variables are assumed to be scale
+# bins            Number of equiheight bins per scale feature to choose thresholds
+# depth           Maximum depth of the learned tree
+# num_leaf        Number of samples when splitting stops and a leaf node is added
+# num_samples     Number of samples at which point we switch to in-memory subtree building
+# num_trees       Number of trees to be learned in the random forest model
+# subsamp_rate    Parameter controlling the size of each tree in the forest; samples are selected from a
+#                 Poisson distribution with parameter subsamp_rate (the default value is 1.0)
+# feature_subset  Parameter that controls the number of feature used as candidates for splitting at each tree node
+#                 as a power of number of features in the dataset;
+#                 by default square root of features (i.e., feature_subset = 0.5) are used at each tree node
+# impurity        Impurity measure: entropy or Gini (the default)
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M               Matrix[Double]   Matrix M containing the learned tree, where each column corresponds to a node
-#                                  in the learned tree and each row contains the following information:
-#                                    M[1,j]: id of node j (in a complete binary tree)
-#                                    M[2,j]: tree id to which node j belongs
-#                                    M[3,j]: Offset (no. of columns) to left child of j
-#                                    M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
-#                                    M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2
-#                                    for categorical features,
-#                                    otherwise the label that leaf node j is supposed to predict
-#                                    M[6,j]: 1 if j is an internal node and the feature chosen for j is scale, otherwise the
-#                                    size of the subset of values
-#                                    stored in rows 7,8,... if j is categorical
-#                                    M[7:,j]: Only applicable for internal nodes. Threshold the example's feature value is
-#                                    compared to is stored at M[7,j] if the feature chosen for j is scale;
-#                                    If the feature chosen for j is categorical rows 7,8,... depict the value subset chosen for j
-# C               Matrix[Double]   Matrix C containing the number of times samples are chosen in each tree of the random forest
-# S_map           Matrix[Double]   Mappings from scale feature ids to global feature ids
-# C_map           Matrix[Double]   Mappings from categorical feature ids to global feature ids
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# M      Matrix M containing the learned tree, where each column corresponds to a node
+#        in the learned tree and each row contains the following information:
+#          M[1,j]: id of node j (in a complete binary tree)
+#          M[2,j]: tree id to which node j belongs
+#          M[3,j]: Offset (no. of columns) to left child of j
+#          M[4,j]: Feature index of the feature that node j looks at if j is an internal node, otherwise 0
+#          M[5,j]: Type of the feature that node j looks at if j is an internal node: 1 for scale and 2
+#          for categorical features,
+#          otherwise the label that leaf node j is supposed to predict
+#          M[6,j]: 1 if j is an internal node and the feature chosen for j is scale, otherwise the
+#          size of the subset of values
+#          stored in rows 7,8,... if j is categorical
+#          M[7:,j]: Only applicable for internal nodes. Threshold the example's feature value is
+#          compared to is stored at M[7,j] if the feature chosen for j is scale;
+#          If the feature chosen for j is categorical rows 7,8,... depict the value subset chosen for j
+# C      Matrix C containing the number of times samples are chosen in each tree of the random forest
+# S_map  Mappings from scale feature ids to global feature ids
+# C_map  Mappings from categorical feature ids to global feature ids
+# --------------------------------------------------------------------------------------------
 
 m_randomForest = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] R, 
     Integer bins = 20, Integer depth = 25, Integer num_leaf = 10, Integer num_samples = 3000, 
diff --git a/scripts/builtin/scale.dml b/scripts/builtin/scale.dml
index 63a5f7fd87..1fb88009da 100644
--- a/scripts/builtin/scale.dml
+++ b/scripts/builtin/scale.dml
@@ -21,23 +21,19 @@
 
 # This function scales and center individual features in the input matrix (column wise.) using z-score to scale the values.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X            Matrix[Double]    ---      Input feature matrix
-# center       Boolean           TRUE     Indicates whether or not to center the feature matrix
-# scale        Boolean           TRUE     Indicates whether or not to scale the feature matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X       Input feature matrix
+# center  Indicates whether or not to center the feature matrix
+# scale   Indicates whether or not to scale the feature matrix
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y            Matrix[Double]             Output feature matrix with K columns
-# Centering    Matrix[Double]             The column means of the input, subtracted if Center was TRUE
-# ScaleFactor  Matrix[Double]             The Scaling of the values, to make each dimension have similar value ranges
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# Y            Output feature matrix with K columns
+# Centering    The column means of the input, subtracted if Center was TRUE
+# ScaleFactor  The Scaling of the values, to make each dimension have similar value ranges
+# -------------------------------------------------------------------------------------------
 
 m_scale = function(Matrix[Double] X, Boolean center=TRUE, Boolean scale=TRUE) 
   return (Matrix[Double] out, Matrix[Double] Centering, Matrix[Double] ScaleFactor) 
diff --git a/scripts/builtin/scaleApply.dml b/scripts/builtin/scaleApply.dml
index 358f356c10..18f5d729c4 100644
--- a/scripts/builtin/scaleApply.dml
+++ b/scripts/builtin/scaleApply.dml
@@ -21,21 +21,17 @@
 
 # This function scales and center individual features in the input matrix (column wise.) using the input matrices.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE              DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X            Matrix[Double]    ---       Input feature matrix
-# Centering    Matrix[Double]    ---       The column means to subtract from X (not done if empty)
-# ScaleFactor  Matrix[Double]    ---       The column scaling to multiply with X (not done if empty)
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X            Input feature matrix
+# Centering    The column means to subtract from X (not done if empty)
+# ScaleFactor  The column scaling to multiply with X (not done if empty)
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y          Matrix[Double]               Output feature matrix with K columns
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------
+# Y     Output feature matrix with K columns
+# ------------------------------------------------------------------------------------
 
 m_scaleApply = function(Matrix[Double] X, Matrix[Double] Centering, Matrix[Double] ScaleFactor) 
   return (Matrix[Double] Y) 
diff --git a/scripts/builtin/scaleMinMax.dml b/scripts/builtin/scaleMinMax.dml
index 095e0d6b48..df2c75d459 100644
--- a/scripts/builtin/scaleMinMax.dml
+++ b/scripts/builtin/scaleMinMax.dml
@@ -21,19 +21,15 @@
 
 # This function performs min-max normalization (rescaling to [0,1]).
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------------------------------
-# NAME         TYPE              DEFAULT  MEANING
-# ------------------------------------------------------------------------------
-# X            Matrix[Double]    ---      Input feature matrix
-# ------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------
+# X      Input feature matrix
+# ---------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ------------------------------------------------------------------------------
-# Y            Matrix[Double]             Scaled output matrix
-# ------------------------------------------------------------------------------
+# --------------------------------------------
+# Y     Scaled output matrix
+# --------------------------------------------
 
 m_scaleMinMax = function(Matrix[Double] X)
   return (Matrix[Double] Y)
diff --git a/scripts/builtin/selectByVarThresh.dml b/scripts/builtin/selectByVarThresh.dml
index 66d06f1395..2431db0228 100644
--- a/scripts/builtin/selectByVarThresh.dml
+++ b/scripts/builtin/selectByVarThresh.dml
@@ -21,20 +21,16 @@
 
 # This function drops feature with <= thresh variance (by default drop constants).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]  ---       Matrix of feature vectors.
-# thresh        Double          0
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X           Matrix of feature vectors.
+# thresh      The threshold for to drop
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Xp           Matrix[Double]             Matrix of feature vectors with <= thresh variance.
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+# Xp     Matrix of feature vectors with <= thresh variance.
+# -------------------------------------------------------------------------------------
 
 m_selectByVarThresh = function(Matrix[Double] X, Double thresh = 0)
   return (Matrix[Double] Xp, Matrix[Double] I)
diff --git a/scripts/builtin/setdiff.dml b/scripts/builtin/setdiff.dml
index 27721de79c..14a5456feb 100644
--- a/scripts/builtin/setdiff.dml
+++ b/scripts/builtin/setdiff.dml
@@ -21,20 +21,16 @@
 
 # Builtin function that implements difference operation on vectors
 #
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         input vector
-# Y               Matrix[Double]  ---         input vector
-# ---------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------
+# X     input vector
+# Y     input vector
+# -------------------------------------------------------
 #
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE              MEANING
-# ---------------------------------------------------------------------------------------------
-# R               Matrix[Double]    vector with all elements that are present in X but not in Y
-# ---------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------
+# R     vector with all elements that are present in X but not in Y
+# -----------------------------------------------------------------
 
 m_setdiff = function(Matrix[double] X, Matrix[double] Y)
   return (matrix[double] R)
diff --git a/scripts/builtin/sherlock.dml b/scripts/builtin/sherlock.dml
index 86b989075f..01430b6843 100644
--- a/scripts/builtin/sherlock.dml
+++ b/scripts/builtin/sherlock.dml
@@ -28,29 +28,25 @@
 # Split feature matrix into four different feature categories and train neural networks on the
 # respective single features. Then combine all trained features to train final neural network.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_train      Matrix[Double]  ---       maxtrix of feature vectors
-# y_train      Matrix[Double]  ---       matrix Y of class labels of semantic data type
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------------
+# X_train  matrix of feature vectors
+# y_train  matrix Y of class labels of semantic data type
+# ----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# cW           Matrix[Double]             weights (parameters) matrices for character distribtions
-# cb           Matrix[Double]             biases vectors for character distribtions
-# wW           Matrix[Double]             weights (parameters) matrices for word embeddings
-# wb           Matrix[Double]             biases vectors for word embeddings
-# pW           Matrix[Double]             weights (parameters) matrices for paragraph vectors
-# pb           Matrix[Double]             biases vectors for paragraph vectors
-# sW           Matrix[Double]             weights (parameters) matrices for global statistics
-# sb           Matrix[Double]             biases vectors for global statistics
-# fW           Matrix[Double]             weights (parameters) matrices for  combining all trained features (final)
-# fb           Matrix[Double]             biases vectors for combining all trained features (final)
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------
+# cW    weights (parameters) matrices for character distributions
+# cb    biases vectors for character distributions
+# wW    weights (parameters) matrices for word embeddings
+# wb    biases vectors for word embeddings
+# pW    weights (parameters) matrices for paragraph vectors
+# pb    biases vectors for paragraph vectors
+# sW    weights (parameters) matrices for global statistics
+# sb    biases vectors for global statistics
+# fW    weights (parameters) matrices for  combining all trained features (final)
+# fb    biases vectors for combining all trained features (final)
+# ------------------------------------------------------------------------------------
 
 source("scripts/nn/examples/sherlockNet.dml") as sherlockNet
 
diff --git a/scripts/builtin/sherlockPredict.dml b/scripts/builtin/sherlockPredict.dml
index e69d0f2615..7fe94198dd 100644
--- a/scripts/builtin/sherlockPredict.dml
+++ b/scripts/builtin/sherlockPredict.dml
@@ -26,30 +26,26 @@
 # [Hulsebos, Madelon, et al. "Sherlock: A deep learning approach to semantic data type detection."
 # Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining.
 # 2019.]
-
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE        DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X     Matrix[Double]   ---       matrix of values which are to be classified
-# cW    Matrix[Double]   ---       weights (parameters) matrices for character distribtions
-# cb    Matrix[Double]   ---       biases vectors for character distribtions
-# wW    Matrix[Double]   ---       weights (parameters) matrices for word embeddings
-# wb    Matrix[Double]   ---       biases vectors for word embeddings
-# pW    Matrix[Double]   ---       weights (parameters) matrices for paragraph vectors
-# pb    Matrix[Double]   ---       biases vectors for paragraph vectors
-# sW    Matrix[Double]   ---       weights (parameters) matrices for global statistics
-# sb    Matrix[Double]   ---       biases vectors for global statistics
-# fW    Matrix[Double]   ---       weights (parameters) matrices for  combining all trained features (final)
-# fb    Matrix[Double]   ---       biases vectors for combining all trained features (final)
-# ----------------------------------------------------------------------------------------------------------------------
+#
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X     matrix of values which are to be classified
+# cW    weights (parameters) matrices for character distribtions
+# cb    biases vectors for character distribtions
+# wW    weights (parameters) matrices for word embeddings
+# wb    biases vectors for word embeddings
+# pW    weights (parameters) matrices for paragraph vectors
+# pb    biases vectors for paragraph vectors
+# sW    weights (parameters) matrices for global statistics
+# sb    biases vectors for global statistics
+# fW    weights (parameters) matrices for  combining all trained features (final)
+# fb    biases vectors for combining all trained features (final)
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# probs  Matrix[Double]              class probabilities of shape (N, K)
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# probs  class probabilities of shape (N, K)
+# ------------------------------------------------------------------------------------------
 
 source("scripts/nn/examples/sherlockNet.dml") as sherlockNet
 
diff --git a/scripts/builtin/shortestPath.dml b/scripts/builtin/shortestPath.dml
index abbdd1c133..3eecf45426 100644
--- a/scripts/builtin/shortestPath.dml
+++ b/scripts/builtin/shortestPath.dml
@@ -25,32 +25,27 @@
 # James C. Dehnert, Ikkan Horn, Naty Leiser and Grzegorz Czajkowski:
 # Pregel: A System for Large-Scale Graph Processing, SIGMOD 2010
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# G           Matrix[Double]    ---      adjacency matrix of the labeled graph: Such graph can be directed
-#                                        (G is symmetric) or undirected (G is not symmetric).
-#                                        The values of G can be 0/1 (just specifying whether the nodes
-#                                        are connected or not) or integer values (representing the weight
-#                                        of the edges or the distances between nodes, 0 if not connected).
-# maxi        Integer           0        Integer max number of iterations accepted (0 for FALSE, i.e.
-#                                        max number of iterations not defined)
-# sourceNode  Integer                    node index to calculate the shortest paths to all other nodes.
-# verbose     Boolean           FALSE    flag for verbose debug output
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# G           adjacency matrix of the labeled graph: Such graph can be directed
+#             (G is symmetric) or undirected (G is not symmetric).
+#             The values of G can be 0/1 (just specifying whether the nodes
+#             are connected or not) or integer values (representing the weight
+#             of the edges or the distances between nodes, 0 if not connected).
+# maxi        Integer max number of iterations accepted (0 for FALSE, i.e.
+#             max number of iterations not defined)
+# sourceNode  node index to calculate the shortest paths to all other nodes.
+# verbose     flag for verbose debug output
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# C           Matrix[Double]             Output matrix (double) of minimum distances (shortest-path) between
-#                                        vertices: The value of the ith row and the jth column of the output
-#                                        matrix is the minimum distance shortest-path from vertex i to vertex j.
-#                                        When the value of the minimum distance is infinity, the two nodes are
-#                                        not connected.
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
+# C      Output matrix (double) of minimum distances (shortest-path) between
+#        vertices: The value of the ith row and the jth column of the output
+#        matrix is the minimum distance shortest-path from vertex i to vertex j.
+#        When the value of the minimum distance is infinity, the two nodes are
+#        not connected.
+# --------------------------------------------------------------------------------------
 
 m_shortestPath = function(Matrix[Double] G, Integer maxi = 0, Integer sourceNode, Boolean verbose = FALSE) 
   return (Matrix[Double] C) 
diff --git a/scripts/builtin/sigmoid.dml b/scripts/builtin/sigmoid.dml
index 7f5c4415ec..5c4d726810 100644
--- a/scripts/builtin/sigmoid.dml
+++ b/scripts/builtin/sigmoid.dml
@@ -22,19 +22,15 @@
 # The Sigmoid function is a type of activation function, and also defined as a squashing function which limit the
 # output to a range between 0 and 1, which will make these functions useful in the prediction of probabilities.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE             DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---      Matrix of feature vectors.
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X      Matrix of feature vectors.
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y      Matrix[Double]             1-column matrix of weights.
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# Y     1-column matrix of weights.
+# ------------------------------------------------------------------------------------------
 
 m_sigmoid = function(Matrix[Double] X) return (Matrix[Double] Y) {
   Y = 1 / (1 + exp(-X));
diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index b7454845ca..3005c8c764 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -19,38 +19,34 @@
 #
 #-------------------------------------------------------------
 
-# This builtin function imlements SliceLine, a linear-algebra-based
+# This builtin function implements SliceLine, a linear-algebra-based
 # ML model debugging technique for finding the top-k data slices where
 # a trained models performs significantly worse than on the overall 
 # dataset. For a detailed description and experimental results, see:
 # Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging.(SIGMOD 2021)
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]  ---       Recoded dataset into Matrix
-# e             Matrix[Double]  ---       Trained model
-# k             Integer         1         Number of subsets required
-# maxL          Integer                   maximum level L (conjunctions of L predicates), 0 unlimited
-# minSup        Integer                   minimum support (min number of rows per slice)
-# alpha         Double                    weight [0,1]: 0 only size, 1 only error
-# tpEval        Boolean                   flag for task-parallel slice evaluation,
-#                                         otherwise data-parallel
-# tpBlksz       Integer                   block size for task-parallel execution (num slices)
-# selFeat       Boolean                   flag for removing one-hot-encoded features that don't satisfy
-#                                         the initial minimum-support constraint and/or have zero error
-# verbose       Boolean                   flag for verbose debug output
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X        Recoded dataset into Matrix
+# e        Trained model
+# k        Number of subsets required
+# maxL     maximum level L (conjunctions of L predicates), 0 unlimited
+# minSup   minimum support (min number of rows per slice)
+# alpha    weight [0,1]: 0 only size, 1 only error
+# tpEval   flag for task-parallel slice evaluation,
+#          otherwise data-parallel
+# tpBlksz  block size for task-parallel execution (num slices)
+# selFeat  flag for removing one-hot-encoded features that don't satisfy
+#          the initial minimum-support constraint and/or have zero error
+# verbose  flag for verbose debug output
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# TK           Matrix[Double]        top-k slices (k x ncol(X) if successful)
-# TKC          Matrix[Double]        score, size, error of slices (k x 3)
-# D            Matrix[Double]        debug matrix, populated with enumeration stats if verbose
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------
+# TK    top-k slices (k x ncol(X) if successful)
+# TKC   score, size, error of slices (k x 3)
+# D     debug matrix, populated with enumeration stats if verbose
+# -----------------------------------------------------------------------------------------
 
 m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, 
     Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE, 
diff --git a/scripts/builtin/smote.dml b/scripts/builtin/smote.dml
index fd482fabba..830c62410c 100644
--- a/scripts/builtin/smote.dml
+++ b/scripts/builtin/smote.dml
@@ -22,23 +22,19 @@
 # Builtin function for handing class imbalance using Synthetic Minority Over-sampling Technique (SMOTE)
 # by Nitesh V. Chawla et. al. In Journal of Artificial Intelligence Research 16 (2002). 321–357
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X           Matrix[Double]   ---         Matrix of minority class samples
-# mask        Matrix[Double]   ---         0/1 mask vector where 0 represent numeric value and 1 represent categorical value
-# s           Integer          25          Amount of SMOTE (percentage of oversampling), integral multiple of 100
-# k           Integer          1           Number of nearest neighbour
-# verbose     Boolean          FALSE       if the algorithm should be verbose
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X        Matrix of minority class samples
+# mask     0/1 mask vector where 0 represent numeric value and 1 represent categorical value
+# s        Amount of SMOTE (percentage of oversampling), integral multiple of 100
+# k        Number of nearest neighbor
+# verbose  if the algorithm should be verbose
+# --------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y           Matrix[Double]               Matrix of (N/100)-1 * nrow(X) synthetic minority class samples
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# Y     Matrix of (N/100)-1 * nrow(X) synthetic minority class samples
+# -----------------------------------------------------------------------------------
 
 m_smote = function(Matrix[Double] X, Matrix[Double] mask, Integer s = 200, Integer k = 1, Boolean verbose = FALSE)
 return (Matrix[Double] Y) {
diff --git a/scripts/builtin/softmax.dml b/scripts/builtin/softmax.dml
index 401532f4d3..500299e6ba 100644
--- a/scripts/builtin/softmax.dml
+++ b/scripts/builtin/softmax.dml
@@ -19,21 +19,17 @@
 #
 #-------------------------------------------------------------
 
-# This is a softmax classifier,forward function Computes the forward pass for a softmax classifier.
+# Performs softmax on the given input matrix.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# S             Matrix[Double]  ---       Inputs of shape (N, D).
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# S     Inputs of shape (N, D).
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# P            Matrix[Double]            Outputs of shape (N, D).
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
+# P     Outputs of shape (N, D).
+# -------------------------------------------------------------------------------------
 
 source("nn/layers/softmax.dml") as sm
 
diff --git a/scripts/builtin/split.dml b/scripts/builtin/split.dml
index 78a51cb67f..8e7f0980df 100644
--- a/scripts/builtin/split.dml
+++ b/scripts/builtin/split.dml
@@ -21,29 +21,25 @@
 
 # This function split input data X and Y into contiguous or samples train/test sets
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]   ---      Input feature matrix
-# Y       Matrix[Double]   ---      Input Labels
-# f       Double           0.7      Train set fraction [0,1]
-# cont    Boolean          TRUE     contiuous splits, otherwise sampled
-# seed    Integer          -1       The seed to reandomly select rows in sampled mode
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X     Input feature matrix
+# Y     Input Labels
+# f     Train set fraction [0,1]
+# cont  contiguous splits, otherwise sampled
+# seed  The seed to randomly select rows in sampled mode
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Xtrain  Matrix[Double]            Train split of feature matrix
-# Xtest   Matrix[Double]            Test split of feature matrix
-# ytrain  Matrix[Double]            Train split of label matrix
-# ytest   Matrix[Double]            Test split of label matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
+# X_Train  Train split of feature matrix
+# X_Test   Test split of feature matrix
+# y_Train  Train split of label matrix
+# y_Test   Test split of label matrix
+# --------------------------------------------------------------------------------------------
 
 m_split = function(Matrix[Double] X, Matrix[Double] Y, Double f=0.7, Boolean cont=TRUE, Integer seed=-1)
-  return (Matrix[Double] Xtrain, Matrix[Double] Xtest, Matrix[Double] Ytrain, Matrix[Double] Ytest) 
+  return (Matrix[Double] X_Train, Matrix[Double] X_Test, Matrix[Double] Y_Train, Matrix[Double] Y_Test) 
 {
   # basic sanity checks
   if( f <= 0 | f >= 1 )
@@ -53,10 +49,10 @@ m_split = function(Matrix[Double] X, Matrix[Double] Y, Double f=0.7, Boolean con
 
   # contiguous train/test splits
   if( cont ) {
-    Xtrain = X[1:f*nrow(X),];
-    Ytrain = Y[1:f*nrow(X),];
-    Xtest = X[(nrow(Xtrain)+1):nrow(X),];
-    Ytest = Y[(nrow(Xtrain)+1):nrow(X),];
+    X_Train = X[1:f*nrow(X),];
+    Y_Train = Y[1:f*nrow(X),];
+    X_Test = X[(nrow(X_Train)+1):nrow(X),];
+    Y_Test = Y[(nrow(X_Train)+1):nrow(X),];
   }
   # sampled train/test splits
   else {
@@ -64,9 +60,9 @@ m_split = function(Matrix[Double] X, Matrix[Double] Y, Double f=0.7, Boolean con
     # extract tuples via permutation (selection) matrix multiply
     # or directly via removeEmpty by selection vector
     I = rand(rows=nrow(X), cols=1, seed=seed) <= f;
-    Xtrain = removeEmpty(target=X, margin="rows", select=I);
-    Ytrain = removeEmpty(target=Y, margin="rows", select=I);
-    Xtest = removeEmpty(target=X, margin="rows", select=(I==0));
-    Ytest = removeEmpty(target=Y, margin="rows", select=(I==0));
+    X_Train = removeEmpty(target=X, margin="rows", select=I);
+    Y_Train = removeEmpty(target=Y, margin="rows", select=I);
+    X_Test = removeEmpty(target=X, margin="rows", select=(I==0));
+    Y_Test = removeEmpty(target=Y, margin="rows", select=(I==0));
   }
 }
diff --git a/scripts/builtin/splitBalanced.dml b/scripts/builtin/splitBalanced.dml
index da314d905c..bb1d86bce8 100644
--- a/scripts/builtin/splitBalanced.dml
+++ b/scripts/builtin/splitBalanced.dml
@@ -22,25 +22,21 @@
 # This functions split input data X and Y into contiguous balanced ratio
 # Related to [SYSTEMDS-2902] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]    ---      Input feature matrix
-# Y       Matrix[Double]    ---      Input Labels
-# f       Double            0.7      Train set fraction [0,1]
-# verbose Boolean           FALSE    print available
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------------------------------------------------
+# X        Input feature matrix
+# Y        Input Labels
+# f        Train set fraction [0,1]
+# verbose  print available
+# --------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE                    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_train   Matrix[Double]          Train split of feature matrix
-# X_test    Matrix[Double]          Test split of feature matrix
-# y_train   Matrix[Double]          Train split of label matrix
-# y_test    Matrix[Double]          Test split of label matrix
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------
+# X_train  Train split of feature matrix
+# X_test   Test split of feature matrix
+# y_train  Train split of label matrix
+# y_test   Test split of label matrix
+# ---------------------------------------------------------------------------------------------
 
 m_splitBalanced = function(Matrix[Double] X, Matrix[Double] Y, Double splitRatio = 0.7, Boolean verbose = FALSE)
 return (Matrix[Double] X_train, Matrix[Double] y_train, Matrix[Double] X_test, 
diff --git a/scripts/builtin/stableMarriage.dml b/scripts/builtin/stableMarriage.dml
index 3eb493c440..ea74a10537 100644
--- a/scripts/builtin/stableMarriage.dml
+++ b/scripts/builtin/stableMarriage.dml
@@ -21,63 +21,63 @@
 
 # This script computes a solution for the stable marriage problem.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE               DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# P             Matrix[Double]     ---         proposer matrix P.
-#                                              It must be a square matrix with no zeros.
-# A             Matrix[Double]     ---         acceptor matrix A.
-#                                              It must be a square matrix with no zeros.
-# ordered       Boolean     TRUE               If true, P and A are assumed to be ordered,
-#                                              i.e. the leftmost value in a row is the most preferred partner's index.
-#                                              i.e. the leftmost value in a row in P is the preference value for the acceptor with
-#                                              index 1 and vice-versa (higher is better).
-# ----------------------------------------------------------------------------------------------------------------------
+# result description:
 #
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE                          MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# result_matrix  Matrix[Double]                Result Matrix
-#                                              If cell [i,j] is non-zero, it means that acceptor i has matched with
-#                                              proposer j. Further, if cell [i,j] is non-zero, it holds the preference
-#                                              value that led to the match.
-#                                              Proposers.mtx:
-#                                              2.0,1.0,3.0
-#                                              1.0,2.0,3.0
-#                                              1.0,3.0,2.0
-#
-#                                              Since ordered=TRUE, this means that proposer 1 (row 1) likes acceptor 2
-#                                              the most, followed by acceptor 1 and acceptor 3.
-#                                              If ordered=FALSE, this would mean that proposer 1 (row 1) likes acceptor 3
-#                                              the most (since the value at [1,3] is the row max),
-#                                              followed by acceptor 1 (2.0 preference value) and acceptor 2 (1.0 preference value).
-#
-#                                              Acceptors.mtx:
-#                                              3.0,1.0,2.0
-#                                              2.0,1.0,3.0
-#                                              3.0,2.0,1.0
+# If cell [i,j] is non-zero, it means that acceptor i has matched with
+# proposer j. Further, if cell [i,j] is non-zero, it holds the preference
+# value that led to the match.
+# Proposers.mtx:
+# 2.0,1.0,3.0
+# 1.0,2.0,3.0
+# 1.0,3.0,2.0
+# 
+# Since ordered=TRUE, this means that proposer 1 (row 1) likes acceptor 2
+# the most, followed by acceptor 1 and acceptor 3.
+# If ordered=FALSE, this would mean that proposer 1 (row 1) likes acceptor 3
+# the most (since the value at [1,3] is the row max),
+# followed by acceptor 1 (2.0 preference value) and acceptor 2 (1.0 preference value).
+# 
+# Acceptors.mtx:
+# 3.0,1.0,2.0
+# 2.0,1.0,3.0
+# 3.0,2.0,1.0
+# 
+# Since ordered=TRUE, this means that acceptor 1 (row 1) likes proposer 3
+# the most, followed by proposer 1 and proposer 2.
+# If ordered=FALSE, this would mean that acceptor 1 (row 1) likes proposer 1
+# the most (since the value at [1,1] is the row max),
+# followed by proposer 3 (2.0 preference value) and proposer 2
+# (1.0 preference value).
+# 
+# Output.mtx (assuming ordered=TRUE):
+# 0.0,0.0,3.0
+# 0.0,3.0,0.0
+# 1.0,0.0,0.0
+# 
+# Acceptor 1 has matched with proposer 3 (since [1,3] is non-zero) at a
+# preference level of 3.0.
+# Acceptor 2 has matched with proposer 2 (since [2,2] is non-zero) at a
+# preference level of 3.0.
+# Acceptor 3 has matched with proposer 1 (since [3,1] is non-zero) at a
+# preference level of 1.0.
 #
-#                                              Since ordered=TRUE, this means that acceptor 1 (row 1) likes proposer 3
-#                                              the most, followed by proposer 1 and proposer 2.
-#                                              If ordered=FALSE, this would mean that acceptor 1 (row 1) likes proposer 1
-#                                              the most (since the value at [1,1] is the row max),
-#                                              followed by proposer 3 (2.0 preference value) and proposer 2
-#                                              (1.0 preference value).
+# INPUT:
+# ----------------------------------------------------------------------------------
+# P        proposer matrix P.
+#          It must be a square matrix with no zeros.
+# A        acceptor matrix A.
+#          It must be a square matrix with no zeros.
+# ordered  If true, P and A are assumed to be ordered,
+#          i.e. the leftmost value in a row is the most preferred partner's index.
+#          i.e. the leftmost value in a row in P is the preference value for the acceptor with
+#          index 1 and vice-versa (higher is better).
+# verbose  if the algorithm should print verbosely
+# ----------------------------------------------------------------------------------
 #
-#                                              Output.mtx (assuming ordered=TRUE):
-#                                              0.0,0.0,3.0
-#                                              0.0,3.0,0.0
-#                                              1.0,0.0,0.0
-#
-#                                              Acceptor 1 has matched with proposer 3 (since [1,3] is non-zero) at a
-#                                              preference level of 3.0.
-#                                              Acceptor 2 has matched with proposer 2 (since [2,2] is non-zero) at a
-#                                              preference level of 3.0.
-#                                              Acceptor 3 has matched with proposer 1 (since [3,1] is non-zero) at a
-#                                              preference level of 1.0.
-# ----------------------------------------------------------------------------------------------------------------------
+# OUTPUT:
+# ----------------------------------------------------------------------------------------
+# result_matrix  Result Matrix
+# ----------------------------------------------------------------------------------------
 
 m_stableMarriage = function(Matrix[Double] P, Matrix[Double] A, Boolean ordered = TRUE, Boolean verbose = FALSE)
   return (Matrix[Double] result_matrix)
diff --git a/scripts/builtin/statsNA.dml b/scripts/builtin/statsNA.dml
index 5543221d32..c94862f72a 100644
--- a/scripts/builtin/statsNA.dml
+++ b/scripts/builtin/statsNA.dml
@@ -21,33 +21,32 @@
 
 # The statsNA-function Print summary stats about the distribution of missing values in a univariate time series.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE               DEFAULT      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]     ---          Numeric Vector ('vector') object containing NAs
-# bins    Integer            4            Split number for bin stats. Number of bins the time series gets
-#                                         divided into. For each bin information about amount/percentage of
-#                                         missing values is printed.
-# verbose Boolean            TRUE         Print detailed information.
-#                                         For print_only = TRUE, the missing value stats are printed with
-#                                         more information ("Stats for Bins" and "overview NA series").
-# ----------------------------------------------------------------------------------------------------------------------
+# result matrix contains the following:
+#  1. Length of time series (including NAs)
+#  2. Number of Missing Values (NAs)
+#  3. Percentage of Missing Values (#2/#1)
+#  4. Number of Gaps (consisting of one or more consecutive NAs)
+#  5. Average Gap Size - Average size of consecutive NAs for the NA gaps
+#  6. Longest NA gap - Longest series of consecutive missing values
+#  7. Most frequent gap size - Most frequently occurring gap size
+#  8. Gap size accounting for most NAs
+#
+#
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X        Numeric Vector ('vector') object containing NAs
+# bins     Split number for bin stats. Number of bins the time series gets
+#          divided into. For each bin information about amount/percentage of
+#          missing values is printed.
+# verbose  Print detailed information.
+#          For print_only = TRUE, the missing value stats are printed with
+#          more information ("Stats for Bins" and "overview NA series").
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# stats   Matrix[Double]    Column vector where each row correspond to following,
-#                           1. Length of time series (including NAs)
-#                           2. Number of Missing Values (NAs)
-#                           3. Percentage of Missing Values (#2/#1)
-#                           4. Number of Gaps (consisting of one or more consecutive NAs)
-#                           5. Average Gap Size - Average size of consecutive NAs for the NA gaps
-#                           6. Longest NA gap - Longest series of consecutive missing values
-#                           7. Most frequent gap size - Most frequently occurring gap size
-#                           8. Gap size accounting for most NAs
-# ----------------------------------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------------------------------------------
+# stats  Column vector where each row correspond to described values
+# ---------------------------------------------------------------------------------------------------
 
 m_statsNA = function(Matrix[Double] X, Integer bins = 4, Boolean verbose = TRUE)
   return(Matrix[Double] stats) 
diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml
index f325770569..6ed2fbb530 100644
--- a/scripts/builtin/steplm.dml
+++ b/scripts/builtin/steplm.dml
@@ -23,7 +23,8 @@
 # This method iteratively runs what-if scenarios and greedily selects the next best feature
 # until the Akaike information criterion (AIC) does not improve anymore. Each configuration trains a regression model
 # via lm, which in turn calls either the closed form lmDS or iterative lmGC.
-# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:
+# 
+# return: Matrix of regression parameters (the betas) and its size depend on icpt input value:
 #         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:
 # icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B
 # icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]
@@ -32,29 +33,25 @@
 # In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated
 # name-value pair per each line, as follows:
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME        TYPE            DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X           Matrix[Double]  ---      Location (on HDFS) to read the matrix X of feature vectors
-# Y           Matrix[Double]  ---      Location (on HDFS) to read the 1-column matrix Y of response values
-# icpt        Integer         0        Intercept presence, shifting and rescaling the columns of X:
-#                                      0 = no intercept, no shifting, no rescaling;
-#                                      1 = add intercept, but neither shift nor rescale X;
-#                                      2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
-# reg         Double          1e-7     learning rate
-# tol         Double          1e-7     Tolerance threashold to train until achieved
-# maxi        Integer         0        maximum iterations 0 means until tolerange is reached
-# verbose     Boolean         TRUE     If the algorithm should be verbose
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X        Location (on HDFS) to read the matrix X of feature vectors
+# Y        Location (on HDFS) to read the 1-column matrix Y of response values
+# icpt     Intercept presence, shifting and rescaling the columns of X:
+#          0 = no intercept, no shifting, no rescaling;
+#          1 = add intercept, but neither shift nor rescale X;
+#          2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
+# reg      learning rate
+# tol      Tolerance threshold to train until achieved
+# maxi     maximum iterations 0 means until tolerance is reached
+# verbose  If the algorithm should be verbose
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# B          Matrix[Double]    Matrix of regression parameters (the betas) and its size depend on icpt input value.
-# S          Matrix[Double]    Matrix of selected features ordered as computed by the algorithm.
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------
+# B     Matrix of regression parameters (the betas) and its size depend on icpt input value.
+# S     Matrix of selected features ordered as computed by the algorithm.
+# -----------------------------------------------------------------------------------------------
 
 m_steplm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
   Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
diff --git a/scripts/builtin/stratstats.dml b/scripts/builtin/stratstats.dml
index 9fe45e6924..4319809ed1 100644
--- a/scripts/builtin/stratstats.dml
+++ b/scripts/builtin/stratstats.dml
@@ -22,67 +22,66 @@
 # The stratstats.dml script computes common bivariate statistics, such as correlation, slope, and their p-value,
 # in parallel for many pairs of input variables in the presence of a confounding categorical variable.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE           DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X     Matrix[Double]  ---     Matrix X that has all 1-st covariates
-# Y     Matrix[Double]  empty   Matrix Y that has all 2-nd covariates
-#                               the default value empty means "use X in place of Y"
-# S     Matrix[Double]  empty   Matrix S that has the stratum column
-#                               the default value empty means "use X in place of S"
-# Xcid  Matrix[Double]  empty   1-st covariate X-column indices
-#                               the default value empty means "use columns 1 : ncol(X)"
-# Ycid  Matrix[Double]  empty   2-nd covariate Y-column indices
-#                               the default value empty means "use columns 1 : ncol(Y)"
-# Scid  Int             1       Column index of the stratum column in S
-# ----------------------------------------------------------------------------------------------------------------------
+# Output contains:
+# (1st covariante, 2nd covariante)
+# 40 columns containing the following information:
+# Col 01: 1st covariate X-column number
+# Col 02: 1st covariate global presence count
+# Col 03: 1st covariate global mean
+# Col 04: 1st covariate global standard deviation
+# Col 05: 1st covariate stratified standard deviation
+# Col 06: R-squared, 1st covariate vs. strata
+# Col 07: adjusted R-squared, 1st covariate vs. strata
+# Col 08: P-value, 1st covariate vs. strata
+# Col 09-10: Reserved
+# Col 11: 2nd covariate Y-column number
+# Col 12: 2nd covariate global presence count
+# Col 13: 2nd covariate global mean
+# Col 14: 2nd covariate global standard deviation
+# Col 15: 2nd covariate stratified standard deviation
+# Col 16: R-squared, 2nd covariate vs. strata
+# Col 17: adjusted R-squared, 2nd covariate vs. strata
+# Col 18: P-value, 2nd covariate vs. strata
+# Col 19-20: Reserved
+# Col 21: Global 1st & 2nd covariate presence count
+# Col 22: Global regression slope (2nd vs. 1st covariate)
+# Col 23: Global regression slope standard deviation
+# Col 24: Global correlation = +/- sqrt(R-squared)
+# Col 25: Global residual standard deviation
+# Col 26: Global R-squared
+# Col 27: Global adjusted R-squared
+# Col 28: Global P-value for hypothesis "slope = 0"
+# Col 29-30: Reserved
+# Col 31: Stratified 1st & 2nd covariate presence count
+# Col 32: Stratified regression slope (2nd vs. 1st covariate)
+# Col 33: Stratified regression slope standard deviation
+# Col 34: Stratified correlation = +/- sqrt(R-squared)
+# Col 35: Stratified residual standard deviation
+# Col 36: Stratified R-squared
+# Col 37: Stratified adjusted R-squared
+# Col 38: Stratified P-value for hypothesis "slope = 0"
+# Col 39: Number of strata with at least two counted points
+# Col 40: Reserved
+#
+#
+# INPUT:
+# ----------------------------------------------------------------------------------------------
+# X     Matrix X that has all 1-st covariates
+# Y     Matrix Y that has all 2-nd covariates
+#       the default value empty means "use X in place of Y"
+# S     Matrix S that has the stratum column
+#       the default value empty means "use X in place of S"
+# Xcid  1-st covariate X-column indices
+#       the default value empty means "use columns 1 : ncol(X)"
+# Ycid  2-nd covariate Y-column indices
+#       the default value empty means "use columns 1 : ncol(Y)"
+# Scid  Column index of the stratum column in S
+# ----------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# OutMtx     Matrix[Double]        Output matrix, one row per each distinct pair
-#                                  (1st covariante, 2nd covariante)
-#                                  40 columns containing the following information:
-#                                  Col 01: 1st covariate X-column number
-#                                  Col 02: 1st covariate global presence count
-#                                  Col 03: 1st covariate global mean
-#                                  Col 04: 1st covariate global standard deviation
-#                                  Col 05: 1st covariate stratified standard deviation
-#                                  Col 06: R-squared, 1st covariate vs. strata
-#                                  Col 07: adjusted R-squared, 1st covariate vs. strata
-#                                  Col 08: P-value, 1st covariate vs. strata
-#                                  Col 09-10: Reserved
-#                                  Col 11: 2nd covariate Y-column number
-#                                  Col 12: 2nd covariate global presence count
-#                                  Col 13: 2nd covariate global mean
-#                                  Col 14: 2nd covariate global standard deviation
-#                                  Col 15: 2nd covariate stratified standard deviation
-#                                  Col 16: R-squared, 2nd covariate vs. strata
-#                                  Col 17: adjusted R-squared, 2nd covariate vs. strata
-#                                  Col 18: P-value, 2nd covariate vs. strata
-#                                  Col 19-20: Reserved
-#                                  Col 21: Global 1st & 2nd covariate presence count
-#                                  Col 22: Global regression slope (2nd vs. 1st covariate)
-#                                  Col 23: Global regression slope standard deviation
-#                                  Col 24: Global correlation = +/- sqrt(R-squared)
-#                                  Col 25: Global residual standard deviation
-#                                  Col 26: Global R-squared
-#                                  Col 27: Global adjusted R-squared
-#                                  Col 28: Global P-value for hypothesis "slope = 0"
-#                                  Col 29-30: Reserved
-#                                  Col 31: Stratified 1st & 2nd covariate presence count
-#                                  Col 32: Stratified regression slope (2nd vs. 1st covariate)
-#                                  Col 33: Stratified regression slope standard deviation
-#                                  Col 34: Stratified correlation = +/- sqrt(R-squared)
-#                                  Col 35: Stratified residual standard deviation
-#                                  Col 36: Stratified R-squared
-#                                  Col 37: Stratified adjusted R-squared
-#                                  Col 38: Stratified P-value for hypothesis "slope = 0"
-#                                  Col 39: Number of strata with at least two counted points
-#                                  Col 40: Reserved
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------
+# OutMtx   Output matrix, one row per each distinct pair
+# ----------------------------------------------------------------------------------------------
 
 m_stratstats = function(Matrix[Double] X, Matrix[Double] Y = matrix(0.0, rows=1,cols=1),
   Matrix[Double] S = matrix(0.0, rows=1,cols=1), Matrix[Double] Xcid = matrix(0.0, rows=1,cols=1),
diff --git a/scripts/builtin/symmetricDifference.dml b/scripts/builtin/symmetricDifference.dml
index 7d875eaafd..c5ef7245ec 100644
--- a/scripts/builtin/symmetricDifference.dml
+++ b/scripts/builtin/symmetricDifference.dml
@@ -21,20 +21,16 @@
 
 # Builtin function that implements symmetric difference set-operation on vectors
 #
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         input vector
-# Y               Matrix[Double]  ---         input vector
-# ---------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------
+# X     input vector
+# Y     input vector
+# -------------------------------------------------------
 #
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE                MEANING
-# ---------------------------------------------------------------------------------------------
-# R               Matrix[Double]      vector with all elements in X and Y but not in both
-# ---------------------------------------------------------------------------------------------
+# ---------------------------------------------------------------
+# R     vector with all elements in X and Y but not in both
+# ---------------------------------------------------------------
 
 m_symmetricDifference = function(Matrix[Double] X, Matrix[Double] Y)
     return (matrix[double] R)
diff --git a/scripts/builtin/tSNE.dml b/scripts/builtin/tSNE.dml
index e9ab7d5879..131ab1013c 100644
--- a/scripts/builtin/tSNE.dml
+++ b/scripts/builtin/tSNE.dml
@@ -22,28 +22,24 @@
 # This function performs dimensionality reduction using tSNE algorithm based on
 # the paper: Visualizing Data using t-SNE, Maaten et. al.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X              Matrix[Double]    ---      Data Matrix of shape
-#                                           (number of data points, input dimensionality)
-# reduced_dims   Integer           2        Output dimensionality
-# perplexity     Integer           30       Perplexity Parameter
-# lr             Double            300.     Learning rate
-# momentum       Double            0.9      Momentum Parameter
-# max_iter       Integer           1000     Number of iterations
-# seed           Integer           -1       The seed used for initial values.
-#                                           If set to -1 random seeds are selected.
-# is_verbose     Boolean           FALSE    Print debug information
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X              Data Matrix of shape
+#                (number of data points, input dimensionality)
+# reduced_dims   Output dimensionality
+# perplexity     Perplexity Parameter
+# lr             Learning rate
+# momentum       Momentum Parameter
+# max_iter       Number of iterations
+# seed           The seed used for initial values.
+#                If set to -1 random seeds are selected.
+# is_verbose     Print debug information
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE               MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y              Matrix[Double]      Data Matrix of shape (number of data points, reduced_dims)
-# ----------------------------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------------
+# Y      Data Matrix of shape (number of data points, reduced_dims)
+# -------------------------------------------------------------------------------------------
 
 m_tSNE = function(Matrix[Double] X, Integer reduced_dims = 2, Integer perplexity = 30,
   Double lr = 300., Double momentum = 0.9, Integer max_iter = 1000, Integer seed = -1, Boolean is_verbose = FALSE)
diff --git a/scripts/builtin/toOneHot.dml b/scripts/builtin/toOneHot.dml
index fd76e89299..2232cdc97b 100644
--- a/scripts/builtin/toOneHot.dml
+++ b/scripts/builtin/toOneHot.dml
@@ -19,22 +19,18 @@
 #
 #-------------------------------------------------------------
 
-# The toOneHot-function encodes unordered categorical vector to multiple binarized vectors.
+# The toOneHot-function encodes unordered categorical vector to multiple binary vectors.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]  ---       Vector with N integer entries between 1 and numClasses
-# numclasses    int             ---       Number of columns, must be be greater than or equal to largest value in X
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X           Vector with N integer entries between 1 and numClasses
+# numclasses  Number of columns, must be be greater than or equal to largest value in X
+# ------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y            Matrix[Double]             One-hot-encoded matrix with shape (N, numClasses)
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------
+# Y     One-hot-encoded matrix with shape (N, numClasses)
+# ------------------------------------------------------------------------------------
 
 m_toOneHot = function(Matrix[Double] X, integer numClasses)
         return (Matrix[Double] Y) {
diff --git a/scripts/builtin/tomeklink.dml b/scripts/builtin/tomeklink.dml
index d4898ff44f..e9853caa32 100644
--- a/scripts/builtin/tomeklink.dml
+++ b/scripts/builtin/tomeklink.dml
@@ -19,26 +19,22 @@
 #
 #-------------------------------------------------------------
 
-# The tomekLink-function performs undersampling by removing Tomek's links for imbalanced multiclass problems
+# The tomekLink-function performs under sampling by removing Tomek's links for imbalanced multi-class problems
 # Computes TOMEK links and drops them from data matrix and label vector.
-# Drops only the majarity label and corresponding point of TOMEK links.
+# Drops only the majority label and corresponding point of TOMEK links.
 #
-# INPUT  PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME   TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X      Matrix[Double]  ---       Data Matrix (nxm)
-# y      Matrix[Double]  ---       Label Matrix (nx1), greater than zero
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X     Data Matrix (nxm)
+# y     Label Matrix (nx1), greater than zero
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                 MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X_under    Matrix[Double]       Data Matrix without Tomek links
-# y_under    Matrix[Double]       Labels corresponding to undersampled data
-# drop_idx   Matrix[Double]       Indices of dropped rows/labels wrt input
-# ----------------------------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------------
+# X_under   Data Matrix without Tomek links
+# y_under   Labels corresponding to under sampled data
+# drop_idx  Indices of dropped rows/labels wrt input
+# ------------------------------------------------------------------------------------------------
 
 m_tomeklink = function(Matrix[Double] X, Matrix[Double] y)
 return (Matrix[Double] X_under, Matrix[Double] y_under, Matrix[Double] drop_idx)
diff --git a/scripts/builtin/topk_cleaning.dml b/scripts/builtin/topk_cleaning.dml
index b322eaf6b1..47902b5431 100644
--- a/scripts/builtin/topk_cleaning.dml
+++ b/scripts/builtin/topk_cleaning.dml
@@ -21,37 +21,7 @@
 
 # This function cleans top-K item (where K is given as input)for a given list of users.
 # metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
-#
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE               DEFAULT    MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# dataTrain       Frame[Unknown]     ---
-# dataTest        Frame[Unknown]     NULL
-# metaData        Frame[Unknown]     NULL
-# primitives      Frame[Unknown]     ---
-# parameters      Frame[Unknown]     ---
-# cmr             Matrix[Double]     Matrix
-# evaluationFunc  String             ---
-# evalFunHp       Matrix[Double]     ---
-# topK            Integer            5
-# resource_val    Integer            20
-# sample          Double             0.1
-# cv              Boolean            TRUE
-# cvk             Integer            2
-# isLastLabel     Boolean            TRUE
-# correctTypos    Boolean            FALSE
-# output          String             ---
-# ----------------------------------------------------------------------------------------------------------------------
-#
-# OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME            TYPE              MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# perf            Boolean
-# ----------------------------------------------------------------------------------------------------------------------
 
-# metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
 source("scripts/pipelines/scripts/utils.dml") as utils;
 source("scripts/pipelines/scripts/enumerateLogical.dml") as lg;
 source("scripts/builtin/bandit.dml") as bandit;
diff --git a/scripts/builtin/underSampling.dml b/scripts/builtin/underSampling.dml
index e56d7a2ab4..48d601ed73 100644
--- a/scripts/builtin/underSampling.dml
+++ b/scripts/builtin/underSampling.dml
@@ -21,22 +21,18 @@
 
 # Builtin to perform random under sampling on data.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME    TYPE            DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X       Matrix[Double]  ---       X data to sample from
-# Y       Matrix[Double]  ---       Y data to sample from it will sample the same rows from x.
-# ratio   Double          ---       The ratio to sample
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------------
+# X      X data to sample from
+# Y      Y data to sample from it will sample the same rows from x.
+# ratio  The ratio to sample
+# -------------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME         TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]             The undersample data X
-# Y         Matrix[Double]             
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------
+# X       The under sample data X
+# Y       The under sample data Y
+# -----------------------------------------------------------------------------------------
 
 m_underSampling = function(Matrix[Double] X, Matrix[Double] Y, Double ratio)
 return(Matrix[Double] X, Matrix[Double] Y)
diff --git a/scripts/builtin/union.dml b/scripts/builtin/union.dml
index ff191c2cf6..10ed9b2863 100644
--- a/scripts/builtin/union.dml
+++ b/scripts/builtin/union.dml
@@ -21,20 +21,16 @@
 
 # Builtin function that implements union operation on vectors
 #
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# X               Matrix  ---         input vector
-# Y               Matrix  ---         input vector
-# ---------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------
+# X     input vector
+# Y     input vector
+# ---------------------------------------------------------------
 #
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE     MEANING
-# ---------------------------------------------------------------------------------------------
-# R               Matrix   matrix with all unique rows existing in X and Y
-# ---------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------
+# R     matrix with all unique rows existing in X and Y
+# --------------------------------------------------------------------------
 
 m_union = function(Matrix[Double] X, Matrix[Double] Y)
   return (matrix[double] R)
diff --git a/scripts/builtin/unique.dml b/scripts/builtin/unique.dml
index 491ac20d3a..57e01949b6 100644
--- a/scripts/builtin/unique.dml
+++ b/scripts/builtin/unique.dml
@@ -21,19 +21,15 @@
 
 # Builtin function that implements unique operation on vectors
 #
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE            DEFAULT     MEANING
-# ---------------------------------------------------------------------------------------------
-# X               Matrix[Double]  ---         input vector
-# ---------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------
+# X     input vector
+# -------------------------------------------------------
 #
 # OUTPUT:
-# ---------------------------------------------------------------------------------------------
-# NAME            TYPE            MEANING
-# ---------------------------------------------------------------------------------------------
-# R               Matrix[Double]  matrix with only unique rows
-# ---------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------
+# R     matrix with only unique rows
+# -------------------------------------------------------------------
 
 m_unique = function(matrix[double] X)
   return (matrix[double] R)
diff --git a/scripts/builtin/univar.dml b/scripts/builtin/univar.dml
index 8d2ac00540..06ef013a94 100644
--- a/scripts/builtin/univar.dml
+++ b/scripts/builtin/univar.dml
@@ -21,21 +21,17 @@
 
 # Computes univariate statistics for all attributes in a given data set
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE               DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X              Matrix[Double]     ---      Input matrix of the shape (N, D)
-# TYPES          Matrix[Integer]    ---      Matrix of the shape (1, D) with features types:
-#                                            1 for scale, 2 for nominal, 3 for ordinal
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ----------------------------------------------------------------------------------
+# X      Input matrix of the shape (N, D)
+# TYPES  Matrix of the shape (1, D) with features types:
+#        1 for scale, 2 for nominal, 3 for ordinal
+# ----------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# univarStats   Matrix[Double]               univariate statistics for all attributes
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# univarStats  univariate statistics for all attributes
+# ----------------------------------------------------------------------------------------
 
 m_univar = function(Matrix[Double] X, Matrix[Double] types)
 return(Matrix[Double] univarStats)
diff --git a/scripts/builtin/vectorToCsv.dml b/scripts/builtin/vectorToCsv.dml
index 8f64053d79..9a28cbb1b4 100644
--- a/scripts/builtin/vectorToCsv.dml
+++ b/scripts/builtin/vectorToCsv.dml
@@ -22,19 +22,15 @@
 # This builtin function  convert vector into csv string such as [1 0 0 1 1 0 1] = "1,4,5,7"
 # Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE             DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# mask       Matrix[Double]   ---         Data vector (having 0 for excluded indexes)
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ------------------------------------------------------------------------------------
+# mask  Data vector (having 0 for excluded indexes)
+# ------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                        MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# indexes    String                      indexes
-# ----------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
+# indexes  indexes
+# ----------------------------------------------------------------------------------------
 
 m_vectorToCsv = function(Matrix[Double] mask)
 return (String indexes){
diff --git a/scripts/builtin/winsorize.dml b/scripts/builtin/winsorize.dml
index eddb1dafa5..311f1e5bf3 100644
--- a/scripts/builtin/winsorize.dml
+++ b/scripts/builtin/winsorize.dml
@@ -23,21 +23,16 @@
 # lower quartile range of the given data then it replaces any value that falls outside this range
 # (less than lower quartile range or more than upper quartile range).
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME      TYPE             DEFAULT         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X         Matrix[Double]   ---            Input feature matrix
-# verbose   Boolean          FALSE          To print output on screen
-#
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------------
+# X            Input feature matrix
+# verbose      To print output on screen
+# -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME     TYPE                             MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# Y        Matrix[Double]                   Matrix without outlier values
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# Y      Matrix without outlier values
+# -----------------------------------------------------------------------------------
 
 m_winsorize = function(Matrix[Double] X, Double ql = 0.05, Double qu = 0.95, Boolean verbose) 
 return (Matrix[Double] Y, Matrix[Double] qLower, Matrix[Double] qUpper) {
diff --git a/scripts/builtin/winsorizeApply.dml b/scripts/builtin/winsorizeApply.dml
index 968a0da273..527d103f51 100644
--- a/scripts/builtin/winsorizeApply.dml
+++ b/scripts/builtin/winsorizeApply.dml
@@ -19,25 +19,20 @@
 #
 #-------------------------------------------------------------
 
-# winsorizeApply takes the upper and lower quantile values per colunm, and
+# winsorizeApply takes the upper and lower quantile values per column, and
 # remove outliers by replacing them with these upper and lower bound values.
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------------------------------
-# NAME     TYPE             DEFAULT   MEANING
-# ------------------------------------------------------------------------------
-# X        Matrix[Double]   ---       Input feature matrix
-# qLower   Matrix[Double]   ---       row vector of upper bounds per column
-# qUpper   Matrix[Double]   ---       row vector of lower bounds per column 
-#
-# ------------------------------------------------------------------------------
+# INPUT:
+# --------------------------------------------------
+# X       Input feature matrix
+# qLower  row vector of upper bounds per column
+# qUpper  row vector of lower bounds per column 
+# --------------------------------------------------
 #
 # OUTPUT:
-# ------------------------------------------------------------------------------
-# NAME     TYPE                       MEANING
-# ------------------------------------------------------------------------------
-# Y        Matrix[Double]             Matrix without outlier values
-# ------------------------------------------------------------------------------
+# ------------------------------------------------
+# Y     Matrix without outlier values
+# ------------------------------------------------
 
 
 m_winsorizeApply = function(Matrix[Double] X,  Matrix[Double] qLower, Matrix[Double] qUpper)
diff --git a/scripts/builtin/xdummy1.dml b/scripts/builtin/xdummy1.dml
index 5dbcbdd763..04e7e39121 100644
--- a/scripts/builtin/xdummy1.dml
+++ b/scripts/builtin/xdummy1.dml
@@ -21,19 +21,15 @@
 
 # This builtin function is here for debugging purposes
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------
-# NAME       TYPE              DEFAULT  MEANING
-# ----------------------------------------------------
-# X          Matrix[Double]    ---      test input
-# ----------------------------------------------------
+# INPUT:
+# ---------------------
+# X      test input
+# ---------------------
 #
 # OUTPUT:
-# ----------------------------------------------------
-# NAME       TYPE              MEANING
-# ----------------------------------------------------
-# Y          Matrix[Double]    test result
-# ----------------------------------------------------
+# -----------------------------
+# Y     test result
+# -----------------------------
 
 m_xdummyTypo = function(Matrix[Double] X) return (Matrix[Double] Y) {
   Y = cor(X);
diff --git a/scripts/builtin/xdummy2.dml b/scripts/builtin/xdummy2.dml
index 9fe9ee062e..b579720114 100644
--- a/scripts/builtin/xdummy2.dml
+++ b/scripts/builtin/xdummy2.dml
@@ -21,20 +21,16 @@
 
 # This builtin function is here for debugging purposes
 #
-# INPUT PARAMETERS:
-# ------------------------------------------------------
-# NAME        TYPE              DEFAULT  MEANING
-# ------------------------------------------------------
-# X           Matrix[Double]    ---      Debug input
-# ------------------------------------------------------
+# INPUT:
+# ----------------------
+# X      Debug input
+# ----------------------
 #
 # OUTPUT:
-# ------------------------------------------------------
-# NAME       TYPE             MEANING
-# ------------------------------------------------------
-# Y          Matrix[Double]   ---
-# Z          Matrix[Double]   ---
-# ------------------------------------------------------
+# --------------------------------
+# Y     ---
+# Z     ---
+# --------------------------------
 
 m_xdummyTypo = function(Matrix[Double] X) return (Matrix[Double] Y, Matrix[Double] Z) {
   Y = cor(X);
diff --git a/scripts/builtin/xgboost.dml b/scripts/builtin/xgboost.dml
index 6cd61ed810..d22b037fea 100644
--- a/scripts/builtin/xgboost.dml
+++ b/scripts/builtin/xgboost.dml
@@ -22,46 +22,43 @@
 # XGBoost is a decision-tree-based ensemble Machine Learning algorithm that uses a gradient boosting. This xgboost
 # implementation supports classification and regression and is capable of working with categorical and scalar features.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME          TYPE              DEFAULT     MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X             Matrix[Double]    ---         Feature matrix X; note that X needs to be both recoded and dummy coded
-# y             Matrix[Double]    ---         Label matrix y; note that y needs to be both recoded and dummy coded
-# R             Matrix[Double]    Matrix      Matrix R; 1xn vector which for each feature in X contains the following information
-#                                             - R[,1]: 1 (scalar feature)
-#                                             - R[,2]: 2 (categorical feature)
-#                                             Feature 1 is a scalar feature and features 2 is a categorical feature
-#                                             If R is not provided by default all variables are assumed to be scale (1)
-# sml_type      Integer           1           Supervised machine learning type: 1 = Regression(default), 2 = Classification
-# num_trees     Integer           7           Number of trees to be created in the xgboost model
-# learning_rate Double            0.3         Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
-# max_depth     Integer           6           Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
-# lambda        Double            0.0         L2 regularization term on weights. Increasing this value will make model more conservative and reduce amount of leaves of a tree
+# Output explained:
+# (the first node is the init prediction) and each row contains
+# the following information:
+# M[1,j]: id of node j (in a complete binary tree)
+# M[2,j]: tree id to which node j belongs
+# M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+# M[4,j]: Feature index of the feature (scale feature id if the feature is
+# scale or categorical feature id if the feature is categorical)
+# that node j looks at if j is an internal node, otherwise 0
+# M[5,j]: Type of the feature that node j looks at if j is an internal node.
+# if leaf = 0, if scalar = 1, if categorical = 2
+# M[6:,j]: If j is an internal node: Threshold the example's feature value is
+# compared to is stored at M[6,j] if the feature chosen for j is scale,
+# otherwise if the feature chosen for j is categorical rows 6,7,... depict
+# the value subset chosen for j
+# If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
 #
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -----------------------------------------------------------------------------------------
+# X              Feature matrix X; note that X needs to be both recoded and dummy coded
+# y              Label matrix y; note that y needs to be both recoded and dummy coded
+# R              Matrix R; 1xn vector which for each feature in X contains the following information
+#                - R[,1]: 1 (scalar feature)
+#                - R[,2]: 2 (categorical feature)
+#                Feature 1 is a scalar feature and features 2 is a categorical feature
+#                If R is not provided by default all variables are assumed to be scale (1)
+# sml_type       Supervised machine learning type: 1 = Regression(default), 2 = Classification
+# num_trees      Number of trees to be created in the xgboost model
+# learning_rate  Alias: eta. After each boosting step the learning rate controls the weights of the new predictions
+# max_depth      Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit
+# lambda         L2 regularization term on weights. Increasing this value will make model more conservative and reduce amount of leaves of a tree
+# -----------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME           TYPE                      MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# M              Matrix[Double]            Matrix M where each column corresponds to a node in the learned tree
-#                                          (the first node is the init prediction) and each row contains
-#                                          the following information:
-#                                          M[1,j]: id of node j (in a complete binary tree)
-#                                          M[2,j]: tree id to which node j belongs
-#                                          M[3,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
-#                                          M[4,j]: Feature index of the feature (scale feature id if the feature is
-#                                          scale or categorical feature id if the feature is categorical)
-#                                          that node j looks at if j is an internal node, otherwise 0
-#                                          M[5,j]: Type of the feature that node j looks at if j is an internal node.
-#                                          if leaf = 0, if scalar = 1, if categorical = 2
-#                                          M[6:,j]: If j is an internal node: Threshold the example's feature value is
-#                                          compared to is stored at M[6,j] if the feature chosen for j is scale,
-#                                          otherwise if the feature chosen for j is categorical rows 6,7,... depict
-#                                          the value subset chosen for j
-#                                          If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------
+# M     Matrix M where each column corresponds to a node in the learned tree
+# -----------------------------------------------------------------------------------
 
 m_xgboost = function(Matrix[Double] X, Matrix[Double] y, 
   Matrix[Double] R = matrix(1,rows=1,cols=nrow(X)), Integer sml_type = 1, Integer num_trees = 7, 
diff --git a/scripts/builtin/xgboostPredictClassification.dml b/scripts/builtin/xgboostPredictClassification.dml
index 19b4c8cd3b..06aa23a57a 100644
--- a/scripts/builtin/xgboostPredictClassification.dml
+++ b/scripts/builtin/xgboostPredictClassification.dml
@@ -22,21 +22,17 @@
 # XGBoost is a decision-tree-based ensemble Machine Learning algorithm that uses a gradient boosting. This xgboost
 # implementation supports classification  and is capable of working with categorical features.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                 TYPE             DEFAULT   MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                    Matrix[Double]   ---       Matrix of feature vectors we want to predict (X_test)
-# M                    Matrix[Double]   ---       The model created at xgboost
-# learning_rate        Matrix[Double]   0.3       The learning rate used in the model
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# -------------------------------------------------------------------------------------
+# X              Matrix of feature vectors we want to predict (X_test)
+# M              The model created at xgboost
+# learning_rate  The learning rate used in the model
+# -------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME                TYPE                       MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# P                   Matrix[Double]             The predictions of the samples using the given xgboost model. (y_prediction)
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# P     The predictions of the samples using the given xgboost model. (y_prediction)
+# -----------------------------------------------------------------------------
 
 m_xgboostPredictClassification = function(Matrix[Double] X, Matrix[Double] M, Double learning_rate = 0.3)
   return (Matrix[Double] P) {
diff --git a/scripts/builtin/xgboostPredictRegression.dml b/scripts/builtin/xgboostPredictRegression.dml
index c0f3c7ddf2..6170cd7b94 100644
--- a/scripts/builtin/xgboostPredictRegression.dml
+++ b/scripts/builtin/xgboostPredictRegression.dml
@@ -22,21 +22,17 @@
 # XGBoost is a decision-tree-based ensemble Machine Learning algorithm that uses a gradient boosting. This xgboost
 # implementation supports regression.
 #
-# INPUT PARAMETERS:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME               TYPE              DEFAULT  MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# X                  Matrix[Double]     ---      Matrix of feature vectors we want to predict (X_test)
-# M                  Matrix[Double]     ---      The model created at xgboost
-# learning_rate      Matrix[Double]     0.3      The learning rate used in the model
-# ----------------------------------------------------------------------------------------------------------------------
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X               Matrix of feature vectors we want to predict (X_test)
+# M               The model created at xgboost
+# learning_rate   The learning rate used in the model
+# ---------------------------------------------------------------------------------------
 #
 # OUTPUT:
-# ----------------------------------------------------------------------------------------------------------------------
-# NAME              TYPE                         MEANING
-# ----------------------------------------------------------------------------------------------------------------------
-# P                 Matrix[Double]               The predictions of the samples using the given xgboost model. (y_prediction)
-# ----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# P     The predictions of the samples using the given xgboost model. (y_prediction)
+# -----------------------------------------------------------------------------
 
 m_xgboostPredictRegression = function(Matrix[Double] X, Matrix[Double] M, Double learning_rate = 0.3)
   return (Matrix[Double] P)
diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py
index 2a441c5cbb..f23f31350d 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -155,30 +155,21 @@ class PythonAPIFunctionGenerator(object):
             newline_spacing = "\n" + " " * (nameLength + 5)
 
             for param in parameters:
-                # map data types
-                # pattern = self.__class__.type_mapping_pattern
-                # print(param)
+         
                 param[1] = self.replace_types(param[1])
-                # print(param)
+    
                 if "[" in param[1] or "[" in param[0]:
                     raise AttributeError(
                         "Failed parsing param" + str(param) + "\n" + str(parameters))
                 if param[2] is not None:
                     has_optional = True
-                    # result.append("{nl}{name}: {typ},".format(
-                    #     result=result, name=param[0], typ=param[1],
-                    #     nl=newline_spacing))
+               
                 else:
-                    # has_optional = False
                     result.append("{nl}{name}: {typ},".format(
                         result=result, name=param[0], typ=param[1],
                         nl=newline_spacing))
             if len(result) == 0:
                 result = ""
-                # if has_optional:
-                # result = u"{kwargs}".format(
-                #     result=result, kwargs=self.__class__.kwargs_parameter_string,
-                #     nl=newline_spacing)
             else:
                 result[0] = result[0][len(newline_spacing):]
                 result[-1] = result[-1][:-1]
@@ -188,7 +179,6 @@ class PythonAPIFunctionGenerator(object):
                         result=result, kwargs=self.__class__.kwargs_parameter_string,
                         nl=newline_spacing)
 
-            # print("\n\n" +str(parameters) + "\n\n " +result)
             return result
         except Exception as e:
             raise AttributeError("Failed Formatting parameter strings: " +
@@ -264,7 +254,7 @@ class PythonAPIFunctionGenerator(object):
         output_nodes = "\n    output_nodes = ["
         for idx, value in enumerate(return_values):
             output_type = re.search(pattern, value[1])[0].upper()
-            # print(output_type)
+
             output_type = output_type.lower()
 
             if output_type == "matrix":
@@ -306,7 +296,7 @@ class PythonAPIDocumentationGenerator(object):
     def __init__(self):
         super(PythonAPIDocumentationGenerator, self).__init__()
 
-    def generate_documentation(self, data: dict) -> str:
+    def generate_documentation(self, header_data: dict, data: dict):
         """
         Generates function header for PythonAPI
         @param data:
@@ -317,31 +307,36 @@ class PythonAPIDocumentationGenerator(object):
             }
         @return: function header including '\"\"\"' at start and end
         """
-        input_param = self.header_parameter_string(data["parameters"])
-        output_param = self.header_return_string(data["return_values"])
-
-        if(len(input_param) < 1 and len(output_param) < 1):
-            return ""
-        return '"""' + \
-            input_param + self.__class__.return_str.format(meaning=output_param.lower()) + \
-            "    " + '"""'
+        description = header_data["description"].replace("\n", "\n    ")
+        input_param = self.header_parameter_string(header_data["parameters"])
+        output_param = self.header_return_string(header_data["return_values"])
+
+        if description == "":
+            data['function_header'] = ""
+        elif header_data["return_values"] == []:
+            data['function_header'] = '"""\n    ' + description + '"""'
+        else:
+            res_str = "\n    :return: \'OperationNode\' containing {meaning} \n".format(
+                meaning=output_param.lower())
+            data['function_header'] = '"""\n    ' + description + \
+                input_param + res_str + '    """'
 
     def header_parameter_string(self, parameter: dict) -> str:
-        parameter_str = ""
+        parameter_str = "\n    "
         for param in parameter:
             parameter_str += self.__class__.param_str.format(
-                pname=param[0], meaning=param[3])
+                pname=param[0], meaning=param[1])
 
         return parameter_str
 
     def header_return_string(self, parameter: dict) -> str:
-        meaning_str = ""
-
+        meaning_str = "\n        "
+        first = True
         for param in parameter:
-            if len(meaning_str) > 0:
-                meaning_str += " & " + param[3]
+            if first:
+                meaning_str += param[1]
             else:
-                meaning_str += param[3]
+                meaning_str += "\n        & " + param[1]
 
         return meaning_str
 
@@ -377,8 +372,12 @@ if __name__ == "__main__":
             header_data = f_parser.parse_header(dml_file)
             data = f_parser.parse_function(dml_file)
             f_parser.check_parameters(header_data, data)
-            data['function_header'] = doc_generator.generate_documentation(
-                header_data)
+            doc_generator.generate_documentation(header_data, data)
+
+            if data['function_header'] == "":
+                print("[WARNING] in : \'{file_name}\' failed parsing docs.".format(
+                    file_name=dml_file))
+
             script_content = fun_generator.generate_function(data)
         except Exception as e:
             print("[ERROR] error in : \'{file_name}\' \n{err} \n{trace}.".format(
diff --git a/src/main/python/generator/parser.py b/src/main/python/generator/parser.py
index 72b5b73671..a1162a1623 100644
--- a/src/main/python/generator/parser.py
+++ b/src/main/python/generator/parser.py
@@ -145,23 +145,6 @@ class FunctionParser(object):
         #     import generator
         #     raise AttributeError("Failed parsing " + param + " " + generator.format_exception(e))
 
-    def get_header_parameters(self, param_str: str):
-        parameters = list()
-        pattern = re.compile(
-            self.__class__.header_parameter_pattern, flags=re.I)
-
-        for param_line in [s for s in param_str.split("\n") if s]:
-            match = pattern.match(param_line)
-            try:
-                parameters.append((match.group(1), match.group(
-                    2), match.group(3), match.group(4)))
-            except Exception as e:
-                if re.search(pattern=self.__class__.divider_pattern, string=param_line, flags=re.I | re.M) is not None:
-                    continue
-                return parameters
-
-        return parameters
-
     def parse_header(self, path: str):
         """
         @param path: path of file to parse
@@ -173,41 +156,74 @@ class FunctionParser(object):
                 'return_values': [('retval1', 'description'),...]
             }
         """
-        try:
-            h_input = self.find_header_input_params(path)
-            input_parameters = self.get_header_parameters(h_input)
 
-            h_output = self.find_header_output_params(path)
-            output_parameters = self.get_header_parameters(h_output)
-        except AttributeError as e:
+        description = ""
+        h_input = ""
+        h_output = ""
+        in_input = False
+        in_output = False
+        with open(path, 'r') as f:
+            for _ in range(22):
+                line = f.readline()
+            while line[0] == '#':
+                if "# INPUT:" in line:
+                    in_input = True
+                    # skip two lines
+                    line = f.readline()
+                    line = f.readline()
+                elif "# OUTPUT:" in line:
+                    in_input = False
+                    in_output = True
+                    # skip two lines
+                    line = f.readline()
+                    line = f.readline()
+
+                if in_output:
+                    if "----------" not in line:
+                        h_output += line[2:]
+                elif in_input:
+                    if "----------" not in line:
+                        h_input += line[2:]
+                else:
+                    description += line[2:]
+                line = f.readline()
+
+        if description == "" or h_input == "" or h_output == "":
             file_name = os.path.basename(path)
             print("[WARNING] Could not parse header in file \'{file_name}\'.".format(
                 file_name=file_name))
             input_parameters = []
             output_parameters = []
-        data = {'function_name': None, 'parameters': input_parameters,
+        else:
+            input_parameters = self.parse_input_output_string(h_input)
+            output_parameters = self.parse_input_output_string(h_output)
+       
+
+        data = {'description': description,
+                'parameters': input_parameters,
                 'return_values': output_parameters}
         return data
 
-    def find_header_input_params(self, path: str):
-        with open(path, 'r') as f:
-            content = f.read()
-        start = re.search(pattern=self.__class__.header_input_pattern,
-                          string=content, flags=re.I | re.M).end()
-        end = re.search(pattern=self.__class__.header_output_pattern,
-                        string=content, flags=re.I | re.M).start()
-        header = content[start:end]
-        return header
-
-    def find_header_output_params(self, path: str):
-        with open(path, 'r') as f:
-            content = f.read()
-        start = re.search(pattern=self.__class__.header_output_pattern,
-                          string=content, flags=re.I | re.M).end()
-        end = re.search(pattern=self.__class__.function_pattern,
-                        string=content, flags=re.I | re.M).start()
-        header = content[start:end]
-        return header
+    def parse_input_output_string(self, data: str):
+        """
+            parse the data into a list of tuples containing
+            a parameter and a description
+        """
+        ret = []
+        for line in data.split("\n"):
+            if line:
+                if line[0] == " ":
+                    prev = ret[-1]
+                    n = (prev[0], prev[1] +"\n        " + line.strip())
+                    ret[-1] = n
+                    # ret[-1][1] += line.strip()
+                else:
+                    vd = line.split("  ", 1)
+                    ret.append((vd[0].strip(),vd[1].strip()))                
+        
+        return ret
+
+
 
     def find_function_definition(self, path: str):
         with open(path, 'r') as f:
diff --git a/src/main/python/systemds/operator/algorithm/builtin/WoE.py b/src/main/python/systemds/operator/algorithm/builtin/WoE.py
index d4c97d5c1a..f16b878cc4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/WoE.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/WoE.py
@@ -32,7 +32,16 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def WoE(X: Matrix,
         Y: Matrix,
         mask: Matrix):
+    """
+    function Weight of evidence / information gain
     
+    
+    :param X: ---
+    :param Y: ---
+    :param mask: ---
+    :return: 'OperationNode' containing 
+        --------- 
+    """
     params_dict = {'X': X, 'Y': Y, 'mask': mask}
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/WoEApply.py b/src/main/python/systemds/operator/algorithm/builtin/WoEApply.py
index 7029db36a7..d6299bb9f3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/WoEApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/WoEApply.py
@@ -32,7 +32,16 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def WoEApply(X: Matrix,
              Y: Matrix,
              entropyMatrix: Matrix):
+    """
+    function Weight of evidence / information gain apply on new data
     
+    
+    :param X: ---
+    :param Y: ---
+    :param entropyMatrix: ---
+    :return: 'OperationNode' containing 
+        --- 
+    """
     params_dict = {'X': X, 'Y': Y, 'entropyMatrix': entropyMatrix}
     return Matrix(X.sds_context,
         'WoEApply',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/abstain.py b/src/main/python/systemds/operator/algorithm/builtin/abstain.py
index d8dfee640d..ee8611a44e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/abstain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/abstain.py
@@ -34,9 +34,16 @@ def abstain(X: Matrix,
             threshold: float,
             **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This function calls the multiLogReg-function in which solves Multinomial
+    Logistic Regression using Trust Region method
+    
+    
+    :param X: Location to read the matrix of feature vectors
+    :param Y: Location to read the matrix with category labels
     :param threshold: ---
     :param verbose: flag specifying if logging information should be printed
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        ------ 
     """
     params_dict = {'X': X, 'Y': Y, 'threshold': threshold}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/als.py b/src/main/python/systemds/operator/algorithm/builtin/als.py
index 5357ea6a81..f067d94953 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/als.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/als.py
@@ -32,15 +32,31 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def als(X: Matrix,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This script computes an approximate factorization of a low-rank matrix X into two matrices U and V
+    using different implementations of the Alternating-Least-Squares (ALS) algorithm.
+    Matrices U and V are computed by minimizing a loss function (with regularization).
+    
+    
+    :param X: Location to read the input matrix X to be factorized
     :param rank: Rank of the factorization
-    :param regType: Regularization: 
+    :param regType: Regularization:
+        "L2" = L2 regularization;
+        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+        + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+        "wL2" = weighted L2 regularization
+        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+        + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
+        + sum (V ^ 2 * col_nonzeros))
     :param reg: Regularization parameter, no regularization if 0.0
     :param maxi: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating U and V once
-    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-    :param if: in loss in any two consecutive iterations falls below this threshold; 
-    :param if: FALSE thr is ignored
-    :return: 'OperationNode' containing m x r matrix where r is the factorization rank & m x r matrix where r is the factorization rank 
+    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
+        if the decrease in loss in any two consecutive iterations falls below this threshold;
+        if check is FALSE thr is ignored
+    :param seed: The seed to random parts of the algorithm
+    :param verbose: If the algorithm should run verbosely
+    :return: 'OperationNode' containing 
+        an m x r matrix where r is the factorization rankan m x r matrix where r is the factorization rank 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
index bde4133bc6..80095faeeb 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
@@ -32,15 +32,31 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def alsCG(X: Matrix,
           **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This script computes an approximate factorization of a low-rank matrix X into two matrices U and V
+    using the Alternating-Least-Squares (ALS) algorithm with conjugate gradient.
+    Matrices U and V are computed by minimizing a loss function (with regularization).
+    
+    
+    :param X: Location to read the input matrix X to be factorized
     :param rank: Rank of the factorization
     :param regType: Regularization:
+        "L2" = L2 regularization;
+        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+        + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
+        "wL2" = weighted L2 regularization
+        f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+        + 0.5 * reg * (sum (U ^ 2 * row_nonzeros)
+        + sum (V ^ 2 * col_nonzeros))
     :param reg: Regularization parameter, no regularization if 0.0
     :param maxi: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating U and V once
     :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
-    :param if: in loss in any two consecutive iterations falls below this threshold;
-    :param if: FALSE thr is ignored
-    :return: 'OperationNode' containing  
+        if the decrease in loss in any two consecutive iterations falls below this threshold;
+        if check is FALSE thr is ignored
+    :param seed: The seed to random parts of the algorithm
+    :param verbose: If the algorithm should run verbosely
+    :return: 'OperationNode' containing 
+        an m x r matrix where r is the factorization rankan m x r matrix where r is the factorization rank 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
index cba1d29f88..fba79a30aa 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
@@ -32,14 +32,24 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def alsDS(X: Matrix,
           **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Alternating-Least-Squares (ALS) algorithm using a direct solve method for
+    individual least squares problems (reg="L2"). This script computes an 
+    approximate factorization of a low-rank matrix V into two matrices L and R.
+    Matrices L and R are computed by minimizing a loss function (with regularization).
+    
+    
+    :param X: Location to read the input matrix V to be factorized
     :param rank: Rank of the factorization
     :param reg: Regularization parameter, no regularization if 0.0
     :param maxi: Maximum number of iterations
     :param check: Check for convergence after every iteration, i.e., updating L and R once
     :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
-    :param if: in loss in any two consecutive iterations falls below this threshold;
-    :param if: FALSE thr is ignored
-    :return: 'OperationNode' containing  
+        if the decrease in loss in any two consecutive iterations falls below this threshold;
+        if check is FALSE thr is ignored
+    :param seed: The seed to random parts of the algorithm
+    :param verbose: If the algorithm should run verbosely
+    :return: 'OperationNode' containing 
+        an m x r matrix where r is the factorization rankan m x r matrix where r is the factorization rank 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsPredict.py b/src/main/python/systemds/operator/algorithm/builtin/alsPredict.py
index 3886c30819..38ddcbaf64 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsPredict.py
@@ -33,7 +33,19 @@ def alsPredict(userIDs: Matrix,
                I: Matrix,
                L: Matrix,
                R: Matrix):
+    """
+    This script computes the rating/scores for a given list of userIDs 
+    using 2 factor matrices L and R. We assume that all users have rates 
+    at least once and all items have been rates at least once.
     
+    
+    :param userIDs: Column vector of user-ids (n x 1)
+    :param I: Indicator matrix user-id x user-id to exclude from scoring
+    :param L: The factor matrix L: user-id x feature-id
+    :param R: The factor matrix R: feature-id x item-id
+    :return: 'OperationNode' containing 
+        the output user-id/item-id/score# 
+    """
     params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R}
     return Matrix(userIDs.sds_context,
         'alsPredict',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsTopkPredict.py b/src/main/python/systemds/operator/algorithm/builtin/alsTopkPredict.py
index f31007d2cb..86eb1a9483 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsTopkPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsTopkPredict.py
@@ -35,8 +35,18 @@ def alsTopkPredict(userIDs: Matrix,
                    R: Matrix,
                    **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This script computes the top-K rating/scores for a given list of userIDs 
+    using 2 factor matrices L and R. We assume that all users have rates 
+    at least once and all items have been rates at least once.
+    
+    
+    :param userIDs: Column vector of user-ids (n x 1)
+    :param I: Indicator matrix user-id x user-id to exclude from scoring
+    :param L: The factor matrix L: user-id x feature-id
+    :param R: The factor matrix R: feature-id x item-id
     :param K: The number of top-K items
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        a matrix containing the top-k item-ids with highest predicted ratings for the specified users (rows)a matrix containing the top-k predicted ratings for the specified users (rows) 
     """
     params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
index fa52482eae..7b232ccd6b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
@@ -36,7 +36,25 @@ def apply_pipeline(testData: Frame,
                    exState: List,
                    iState: List,
                    **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+    This script will read the dirty and clean data, then it will apply the best pipeline on dirty data
+    and then will classify both cleaned dataset and check if the cleaned dataset is performing same as original dataset
+    in terms of classification accuracy
     
+    
+    :param trainData: ---
+    :param testData: ---
+    :param metaData: ---
+    :param lp: ---
+    :param pip: ---
+    :param hp: ---
+    :param evaluationFunc: ---
+    :param evalFunHp: ---
+    :param isLastLabel: ---
+    :param correctTypos: ---
+    :return: 'OperationNode' containing 
+        --- 
+    """
     params_dict = {'testData': testData, 'pip': pip, 'applyFunc': applyFunc, 'hp': hp, 'exState': exState, 'iState': iState}
     params_dict.update(kwargs)
     return Matrix(testData.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/arima.py b/src/main/python/systemds/operator/algorithm/builtin/arima.py
index b731100baa..38669c7b69 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/arima.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/arima.py
@@ -32,6 +32,11 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def arima(X: Matrix,
           **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Builtin function that implements ARIMA
+    
+    
+    :param X: The input Matrix to apply Arima on.
+    :param max_func_invoc: ---
     :param p: non-seasonal AR order
     :param d: non-seasonal differencing order
     :param q: non-seasonal MA order
@@ -41,7 +46,8 @@ def arima(X: Matrix,
     :param s: period in terms of number of time-steps
     :param include_mean: center to mean 0, and include in result
     :param solver: solver, is either "cg" or "jacobi"
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        the calculated coefficients 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
index 3f3a061170..b6012ef345 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
@@ -35,17 +35,32 @@ def autoencoder_2layer(X: Matrix,
                        max_epochs: int,
                        **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Trains a 2-layer autoencoder with minibatch SGD and step-size decay.
+    If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+    Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+    The script standardizes the input before training (can be turned off).
+    Also, it randomly reshuffles rows before training.
+    Currently, tanh is set to be the activation function. 
+    By re-implementing 'func' DML-bodied function, one can change the activation.
+    
+    
+    :param X: Filename where the input is stored
     :param num_hidden1: Number of neurons in the 1st hidden layer
     :param num_hidden2: Number of neurons in the 2nd hidden layer
     :param max_epochs: Number of epochs to train for
     :param full_obj: If TRUE, Computes objective function value (squared-loss)
-    :param at: of each epoch. Note that, computing the full 
-    :param objective: a lot of time. 
+        at the end of each epoch. Note that, computing the full
+        objective can take a lot of time.
     :param batch_size: Mini-batch size (training parameter)
     :param step: Initial step size (training parameter)
     :param decay: Decays step size after each epoch (training parameter)
     :param mu: Momentum parameter (training parameter)
-    :return: 'OperationNode' containing  
+    :param W1_rand: Weights might be initialized via input matrices
+    :param W2_rand: ---
+    :param W3_rand: ---
+    :param W4_rand: ---
+    :return: 'OperationNode' containing 
+        matrix storing weights between input layer and 1st hidden layermatrix storing bias between input layer and 1st hidden layermatrix storing weights between 1st hidden layer and 2nd hidden layermatrix storing bias between 1st hidden layer and 2nd hidden layermatrix storing weights between 2nd hidden layer and 3rd hidden layermatrix storing bias between 2nd hidden layer and 3rd hidden layermatrix storing weights between 3rd hidden layer and output layermatrix storing bias between 3rd [...]
     """
     params_dict = {'X': X, 'num_hidden1': num_hidden1, 'num_hidden2': num_hidden2, 'max_epochs': max_epochs}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
index 4adf73c760..64ea2a1a3d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
@@ -43,7 +43,31 @@ def bandit(X_train: Matrix,
            baseLineScore: float,
            cv: bool,
            **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+    In The bandit function the objective is to find an arm that optimizes
+    a known functional of the unknown arm-reward distributions.
     
+    
+    :param X_train: ---
+    :param Y_train: ---
+    :param X_test: ---
+    :param Y_test: ---
+    :param metaList: ---
+    :param evaluationFunc: ---
+    :param evalFunHp: ---
+    :param lp: ---
+    :param primitives: ---
+    :param params: ---
+    :param K: ---
+    :param R: ---
+    :param baseLineScore: ---
+    :param cv: ---
+    :param cvk: ---
+    :param verbose: ---
+    :param output: ---
+    :return: 'OperationNode' containing 
+        --- 
+    """
     params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp, 'lp': lp, 'lpHp': lpHp, 'primitives': primitives, 'param': param, 'baseLineScore': baseLineScore, 'cv': cv}
     params_dict.update(kwargs)
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bivar.py b/src/main/python/systemds/operator/algorithm/builtin/bivar.py
index a84fc4e090..b2e02a5408 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bivar.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/bivar.py
@@ -36,8 +36,19 @@ def bivar(X: Matrix,
           T2: Matrix,
           verbose: bool):
     """
+    For a given pair of attribute sets, compute bivariate statistics between all attribute pairs.
+    Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n}
+    compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n).
+    
+    
+    :param X: Input matrix
+    :param S1: First attribute set {A_11, A_12, ... A_1m}
+    :param S2: Second attribute set {A_21, A_22, ... A_2n}
+    :param T1: Kind for attributes in S1
+        (kind=1 for scale, kind=2 for nominal, kind=3 for ordinal)
     :param verbose: Print bivar stats
-    :return: 'OperationNode' containing as output with bivar stats & as output with bivar stats & as output with bivar stats & as output with bivar stats 
+    :return: 'OperationNode' containing 
+        basestats_scale_scale as output with bivar statsbasestats_nominal_scale as output with bivar statsbasestats_nominal_nominal as output with bivar statsbasestats_ordinal_ordinal as output with bivar stats 
     """
     params_dict = {'X': X, 'S1': S1, 'S2': S2, 'T1': T1, 'T2': T2, 'verbose': verbose}
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/components.py b/src/main/python/systemds/operator/algorithm/builtin/components.py
index c617d92482..6133a4bf1e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/components.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/components.py
@@ -32,15 +32,24 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def components(G: Matrix,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Computes the connected components of a graph and returns a
+    vector indicating the assignment of vertices to components,
+    where each component is identified by the maximum vertex ID
+    (i.e., row/column position of the input graph) 
+    
+    
     :param X: Location to read the matrix of feature vectors
     :param Y: Location to read the matrix with category labels
     :param icpt: Intercept presence, shifting and rescaling X columns: 0 = no intercept,
+        no shifting, no rescaling; 1 = add intercept, but neither shift nor rescale X;
+        2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param tol: tolerance ("epsilon")
     :param reg: regularization parameter (lambda = 1/C); intercept is not regularized
     :param maxi: max. number of outer (Newton) iterations
     :param maxii: max. number of inner (conjugate gradient) iterations, 0 = no max
     :param verbose: flag specifying if logging information should be printed
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        regression betas as output for prediction 
     """
     params_dict = {'G': G}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py b/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
index 91581644f1..f2183706b8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
@@ -32,8 +32,26 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def confusionMatrix(P: Matrix,
                     Y: Matrix):
     """
-    :param encoded: actual labels
-    :return: 'OperationNode' containing  
+    Accepts a vector for prediction and a one-hot-encoded matrix
+    Then it computes the max value of each vector and compare them
+    After which, it calculates and returns the sum of classifications
+    and the average of each true class.
+                      True Labels
+                        1    2
+                    1   TP | FP
+      Predictions      ----+----
+                    2   FN | TN
+    TP = True Positives
+    FP = False Positives
+    FN = False Negatives
+    TN = True Negatives
+    
+    
+    :param P: vector of Predictions
+    :param Y: vector of Golden standard One Hot Encoded; the one hot
+        encoded vector of actual labels
+    :return: 'OperationNode' containing 
+        the confusion matrix sums of classificationsthe confusion matrix averages of each true class 
     """
     params_dict = {'P': P, 'Y': Y}
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cor.py b/src/main/python/systemds/operator/algorithm/builtin/cor.py
index ddc4c8912a..bb5a87c29d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cor.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cor.py
@@ -30,7 +30,14 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def cor(X: Matrix):
+    """
+    This Function compute correlation matrix
     
+    
+    :param X: A Matrix Input to compute the correlation on
+    :return: 'OperationNode' containing 
+        correlation matrix of the input matrix 
+    """
     params_dict = {'X': X}
     return Matrix(X.sds_context,
         'cor',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
index acbd0f9448..59ad8e1d5c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
@@ -32,10 +32,23 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def correctTypos(strings: Frame,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Corrects corrupted frames of strings
+    This algorithm operates on the assumption that most strings are correct
+    and simply swaps strings that do not occur often with similar strings that 
+    occur more often
+    References:
+    Fred J. Damerau. 1964. 
+      A technique for computer detection and correction of spelling errors. 
+      Commun. ACM 7, 3 (March 1964), 171–176. 
+      DOI:https://doi.org/10.1145/363958.363994
+    
+    
+    :param strings: The nx1 input frame of corrupted strings
     :param frequency_threshold: Strings that occur above this frequency level will not be corrected
     :param distance_threshold: Max distance at which strings are considered similar
     :param is_verbose: Print debug information
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        corrected nx1 output frame 
     """
     params_dict = {'strings': strings}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
index 3aa4c0e2d4..c93a4cc418 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
@@ -34,9 +34,26 @@ def correctTyposApply(strings: Frame,
                       dict: Frame,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Corrects corrupted frames of strings
+    This algorithm operates on the assumption that most strings are correct
+    and simply swaps strings that do not occur often with similar strings that 
+    occur more often
+    References:
+    Fred J. Damerau. 1964. 
+      A technique for computer detection and correction of spelling errors. 
+      Commun. ACM 7, 3 (March 1964), 171–176. 
+      DOI:https://doi.org/10.1145/363958.363994
+    TODO: future: add parameter for list of words that are sure to be correct
+    
+    
+    :param strings: The nx1 input frame of corrupted strings
+    :param nullMask: ---
     :param frequency_threshold: Strings that occur above this frequency level will not be corrected
     :param distance_threshold: Max distance at which strings are considered similar
-    :return: 'OperationNode' containing  
+    :param distance matrix: ---
+    :param dict: ---
+    :return: 'OperationNode' containing 
+        corrected nx1 output frame 
     """
     params_dict = {'strings': strings, 'distance_matrix': distance_matrix, 'dict': dict}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cox.py b/src/main/python/systemds/operator/algorithm/builtin/cox.py
index 0b9bfb1c48..b00c511a3a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cox.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cox.py
@@ -35,21 +35,58 @@ def cox(X: Matrix,
         R: Matrix,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param X: Location to read the input matrix X containing the survival data 
-    :param containing: information
-    :param TE: Column indices of X as a column vector which contain timestamp 
-    :param F: Column indices of X as a column vector which are to be used for 
-    :param fitting: model
+    This script fits a cox Proportional hazard regression model.
+    The Breslow method is used for handling ties and the regression parameters 
+    are computed using trust region newton method with conjugate gradient 
+    
+    
+    
+    :param X: Location to read the input matrix X containing the survival data
+        containing the following information
+        1: timestamps
+        2: whether an event occurred (1) or data is censored (0)
+        3: feature vectors
+    :param TE: Column indices of X as a column vector which contain timestamp
+        (first row) and event information (second row)
+    :param F: Column indices of X as a column vector which are to be used for
+        fitting the Cox model
     :param R: If factors (categorical variables) are available in the input matrix
-    :param the: X
-    :param each: needs to be removed from X; in this case the start
-    :param and: corresponding to the baseline level need to be the same;
-    :param if: not provided by default all variables are considered to be continuous 
-    :param alpha: Parameter to compute a 100*(1-alpha)% confidence interval for the betas  
+        X, location to read matrix R containing the start and end indices of
+        the factors in X
+        R[,1]: start indices
+        R[,2]: end indices
+        Alternatively, user can specify the indices of the baseline level of
+        each factor which needs to be removed from X; in this case the start
+        and end indices corresponding to the baseline level need to be the same;
+        if R is not provided by default all variables are considered to be continuous
+    :param alpha: Parameter to compute a 100*(1-alpha)% confidence interval for the betas
     :param tol: Tolerance ("epsilon")
     :param moi: Max. number of outer (Newton) iterations
     :param mii: Max. number of inner (conjugate gradient) iterations, 0 = no max
-    :return: 'OperationNode' containing matrix rt that contains the order-preserving recoded timestamps from x & which is matrix x with sorted timestamps & matrix mf that contains the column indices of x with the baseline factors removed (if available) 
+    :return: 'OperationNode' containing 
+        a d x 7 matrix m, where d denotes the number of covariates, with the following schema:
+        m[,1]: betas
+        m[,2]: exp(betas)
+        m[,3]: standard error of betas
+        m[,4]: z
+        m[,5]: p-value
+        m[,6]: lower 100*(1-alpha)% confidence interval of betas
+        m[,7]: upper 100*(1-alpha)% confidence interval of betastwo matrices containing a summary of some statistics of the fitted model:
+        1 - file s with the following format
+        - row 1: no. of observations
+        - row 2: no. of events
+        - row 3: log-likelihood
+        - row 4: aic
+        - row 5: rsquare (cox & snell)
+        - row 6: max possible rsquare
+        2 - file t with the following format
+        - row 1: likelihood ratio test statistic, degree of freedom, p-value
+        - row 2: wald test statistic, degree of freedom, p-value
+        - row 3: score (log-rank) test statistic, degree of freedom, p-valueadditionally, the following matrices are stored (needed for prediction)
+        1- a column matrix rt that contains the order-preserving recoded timestamps from x
+        2- matrix xo which is matrix x with sorted timestamps
+        3- variance-covariance matrix of the betas cov
+        4- a column matrix mf that contains the column indices of x with the baseline factors removed (if available) 
     """
     params_dict = {'X': X, 'TE': TE, 'F': F, 'R': R}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cspline.py b/src/main/python/systemds/operator/algorithm/builtin/cspline.py
index a84c1edb12..e84f9b01d5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cspline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cspline.py
@@ -34,13 +34,21 @@ def cspline(X: Matrix,
             inp_x: float,
             **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param monotonically: there is no duplicates points in X
+    Solves Cubic Spline Interpolation
+    Algorithms: implement https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline
+    It use natural spline with q1''(x0) == qn''(xn) == 0.0
+    
+    
+    :param X: 1-column matrix of x values knots. It is assumed that x values are
+        monotonically increasing and there is no duplicates points in X
+    :param Y: 1-column matrix of corresponding y values knots
     :param inp_x: the given input x, for which the cspline will find predicted y
     :param mode: Specifies the method for cspline (DS - Direct Solve, CG - Conjugate Gradient)
     :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if
-    :param L2: the beta-residual is less than tolerance * its initial norm
+        L2 norm of the beta-residual is less than tolerance * its initial norm
     :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        predicted valuematrix of k parameters 
     """
     params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
index 75cc5c451f..1e5e52929a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py
@@ -34,12 +34,18 @@ def csplineCG(X: Matrix,
               inp_x: float,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param monotonically: there is no duplicates points in X
+    Builtin that solves cubic spline interpolation using conjugate gradient algorithm
+    
+    
+    :param X: 1-column matrix of x values knots. It is assumed that x values are
+        monotonically increasing and there is no duplicates points in X
+    :param Y: 1-column matrix of corresponding y values knots
     :param inp_x: the given input x, for which the cspline will find predicted y.
     :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if
-    :param L2: the beta-residual is less than tolerance * its initial norm
+        L2 norm of the beta-residual is less than tolerance * its initial norm
     :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        predicted valuematrix of k parameters 
     """
     params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py b/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
index 19454be261..dfdfa536df 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/csplineDS.py
@@ -33,9 +33,15 @@ def csplineDS(X: Matrix,
               Y: Matrix,
               inp_x: float):
     """
-    :param monotonically: there is no duplicates points in X
+    Builtin that solves cubic spline interpolation using a direct solver.
+    
+    
+    :param X: 1-column matrix of x values knots. It is assumed that x values are
+        monotonically increasing and there is no duplicates points in X
+    :param Y: 1-column matrix of corresponding y values knots
     :param inp_x: the given input x, for which the cspline will find predicted y.
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        predicted valuematrix of k parameters 
     """
     params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cvlm.py b/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
index 5a82de48b6..35f8e519e5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/cvlm.py
@@ -34,10 +34,19 @@ def cvlm(X: Matrix,
          k: int,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    The cvlm-function is used for cross-validation of the provided data model. This function follows a non-exhaustive cross
+    validation method. It uses lm and lmPredict functions to solve the linear regression and to predict the class of a
+    feature vector with no intercept, shifting, and rescaling.
+    
+    
+    :param X: Recorded Data set into matrix
+    :param y: 1-column matrix of response values.
     :param k: Number of subsets needed, It should always be more than 1 and less than nrow(X)
     :param icpt: Intercept presence, shifting and rescaling the columns of X
     :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for
-    :return: 'OperationNode' containing  
+        highly dependant/sparse/numerous features
+    :return: 'OperationNode' containing 
+        response valuesvalidated data set 
     """
     params_dict = {'X': X, 'y': y, 'k': k}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dbscan.py b/src/main/python/systemds/operator/algorithm/builtin/dbscan.py
index 0bc08656bb..541b272a58 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dbscan.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dbscan.py
@@ -32,9 +32,15 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def dbscan(X: Matrix,
            **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Implements the DBSCAN clustering algorithm using Euclidian distance matrix
+    
+    
+    :param X: The input Matrix to do DBSCAN on.
     :param eps: Maximum distance between two points for one to be considered reachable for the other.
     :param minPts: Number of points in a neighborhood for a point to be considered as a core point
-    :return: 'OperationNode' containing  
+        (includes the point itself).
+    :return: 'OperationNode' containing 
+        clustering matrix 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dbscanApply.py b/src/main/python/systemds/operator/algorithm/builtin/dbscanApply.py
index e63b18cbf9..0d254e79cc 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dbscanApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dbscanApply.py
@@ -33,8 +33,15 @@ def dbscanApply(X: Matrix,
                 clusterModel: Matrix,
                 eps: float):
     """
+    Implements the outlier detection/prediction algorithm using a DBScan model
+    
+    
+    :param NAME: MEANING
+    :param X: The input Matrix to do outlier detection on.
+    :param clusterModel: Model of clusters to predict outliers against.
     :param eps: Maximum distance between two points for one to be considered reachable for the other.
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        meaningpredicted outliers 
     """
     params_dict = {'X': X, 'clusterModel': clusterModel, 'eps': eps}
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
index 94da3da45c..9b321898e2 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
@@ -34,12 +34,36 @@ def decisionTree(X: Matrix,
                  R: Matrix,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param a: vector, other positive Integers indicate the number of categories
-    :param If: not provided by default all variables are assumed to be scale
+    Builtin script implementing classification trees with scale and categorical features
+    
+    
+    :param X: Feature matrix X; note that X needs to be both recoded and dummy coded
+    :param Y: Label matrix Y; note that Y needs to be both recoded and dummy coded
+    :param R: Matrix R which for each feature in X contains the following information
+        - R[1,]: Row Vector which indicates if feature vector is scalar or categorical. 1 indicates
+        a scalar feature vector, other positive Integers indicate the number of categories
+        If R is not provided by default all variables are assumed to be scale
     :param bins: Number of equiheight bins per scale feature to choose thresholds
     :param depth: Maximum depth of the learned tree
     :param verbose: boolean specifying if the algorithm should print information while executing
-    :return: 'OperationNode' containing information: & if the feature is categorical) & looks at if j is an internal node, otherwise 0 & as r input vector & of the subset of values & 6,7,... if j is categorical & a leaf node: number of misclassified samples reaching at node j & at m[6,j] if the feature chosen for j is scale, & feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j & a leaf node 1 if j is impure and the number of samples at j > threshold, ot [...]
+    :return: 'OperationNode' containing 
+        matrix m where each column corresponds to a node in the learned tree and each row
+        contains the following information:
+        m[1,j]: id of node j (in a complete binary tree)
+        m[2,j]: offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+        m[3,j]: feature index of the feature (scale feature id if the feature is scale or
+        categorical feature id if the feature is categorical)
+        that node j looks at if j is an internal node, otherwise 0
+        m[4,j]: type of the feature that node j looks at if j is an internal node: holds
+        the same information as r input vector
+        m[5,j]: if j is an internal node: 1 if the feature chosen for j is scale,
+        otherwise the size of the subset of values
+        stored in rows 6,7,... if j is categorical
+        if j is a leaf node: number of misclassified samples reaching at node j
+        m[6:,j]: if j is an internal node: threshold the example's feature value is compared
+        to is stored at m[6,j] if the feature chosen for j is scale,
+        otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
+        if j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0 
     """
     params_dict = {'X': X, 'Y': Y, 'R': R}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTreePredict.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTreePredict.py
index 7880edaa1f..7c485fa3c8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTreePredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTreePredict.py
@@ -33,18 +33,32 @@ def decisionTreePredict(M: Matrix,
                         X: Matrix,
                         strategy: str):
     """
-    :param to: in the learned tree and each row contains the following information:
-    :param categorical: if the feature is categorical)
-    :param that: looks at if j is an internal node, otherwise 0
-    :param the: as R input vector
-    :param otherwise: of the subset of values
-    :param stored: 6,7,... if j is categorical
-    :param If: a leaf node: number of misclassified samples reaching at node j
-    :param to: at M[6,j] if the feature chosen for j is scale,
-    :param otherwise: feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
-    :param If: a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
-    :param strategy: strategy, can be one of ["GEMM", "TT", "PTT"], referring to "Generic matrix multiplication", 
-    :return: 'OperationNode' containing  
+    Builtin script implementing prediction based on classification trees with scale features using prediction methods of the
+    Hummingbird paper (https://www.usenix.org/system/files/osdi20-nakandala.pdf).
+    
+    
+    :param M: Decision tree matrix M, as generated by scripts/builtin/decisionTree.dml, where each column corresponds
+        to a node in the learned tree and each row contains the following information:
+        M[1,j]: id of node j (in a complete binary tree)
+        M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, otherwise 0
+        M[3,j]: Feature index of the feature (scale feature id if the feature is scale or
+        categorical feature id if the feature is categorical)
+        that node j looks at if j is an internal node, otherwise 0
+        M[4,j]: Type of the feature that node j looks at if j is an internal node: holds
+        the same information as R input vector
+        M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale,
+        otherwise the size of the subset of values
+        stored in rows 6,7,... if j is categorical
+        If j is a leaf node: number of misclassified samples reaching at node j
+        M[6:,j]: If j is an internal node: Threshold the example's feature value is compared
+        to is stored at M[6,j] if the feature chosen for j is scale,
+        otherwise if the feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j
+        If j is a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0
+    :param X: Feature matrix X
+    :param strategy: Prediction strategy, can be one of ["GEMM", "TT", "PTT"], referring to "Generic matrix multiplication",
+        "Tree traversal", and "Perfect tree traversal", respectively
+    :return: 'OperationNode' containing 
+        matrix containing the predicted labels for x 
     """
     params_dict = {'M': M, 'X': X, 'strategy': strategy}
     return Matrix(M.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py b/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
index 1f5b106ade..3295690676 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/deepWalk.py
@@ -36,13 +36,18 @@ def deepWalk(Graph: Matrix,
              t: int,
              **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This script performs DeepWalk on a given graph (https://arxiv.org/pdf/1403.6652.pdf)
+    
+    
+    :param Graph: adjacency matrix of a graph (n x n)
     :param w: window size
     :param d: embedding size
     :param gamma: walks per vertex
     :param t: walk length
     :param alpha: learning rate
     :param beta: factor for decreasing learning rate
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        matrix of vertex/word representation (n x d) 
     """
     params_dict = {'Graph': Graph, 'w': w, 'd': d, 'gamma': gamma, 't': t}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
index b2bb53c59b..ab46e7fac8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
@@ -32,32 +32,56 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def denialConstraints(dataFrame: Frame,
                       constraintsFrame: Frame):
     """
+    This function considers some constraints indicating statements that can NOT happen in the data (denial constraints).
+      EXAMPLE:
+      dataFrame:
+           rank       discipline   yrs.since.phd   yrs.service   sex      salary
+      1    Prof       B            19              18            Male     139750
+      2    Prof       B            20              16            Male     173200
+      3    AsstProf   B            3               3             Male     79750.56
+      4    Prof       B            45              39            Male     115000
+      5    Prof       B            40              40            Male     141500
+      6    AssocProf  B            6               6             Male     97000
+      7    Prof       B            30              23            Male     175000
+      8    Prof       B            45              45            Male     147765
+      9    Prof       B            21              20            Male     119250
+      10   Prof       B            18              18            Female   129000
+      11   AssocProf  B            12              8             Male     119800
+      12   AsstProf   B            7               2             Male     79800
+      13   AsstProf   B            1               1             Male     77700
+      constraintsFrame:
+         
+      idx   constraint.type   group.by   group.variable      group.option   variable1      relation   variable2
+      1     variableCompare   FALSE                                         yrs.since.phd  <          yrs.service
+      2     instanceCompare   TRUE       rank                Prof           yrs.service    ><         salary
+      3     valueCompare      FALSE                                         salary         =          78182
+      4     variableCompare   TRUE       discipline          B              yrs.service    >          yrs.since.phd
+      Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary.
+    
+    
     :param dataFrame: frame which columns represent the variables of the data and the rows correspond
-    :param to: or instances.
-    :param Recommended: a column indexing the instances from 1 to N (N=number of instances).
+        to different tuples or instances.
+        Recommended to have a column indexing the instances from 1 to N (N=number of instances).
     :param constraintsFrame: frame with fixed columns and each row representing one constraint.
-    :param ie: value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2, 
-    :param then: of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2.
-    :param in: of instanceCompare
-    :param rank: yrs.service   sex      salary
-    :param 1: 19              18            Male     139750
-    :param 2: 20              16            Male     173200
-    :param 3: 3               3             Male     79750.56
-    :param 4: 45              39            Male     115000
-    :param 5: 40              40            Male     141500
-    :param 6: 6               6             Male     97000
-    :param 7: 30              23            Male     175000
-    :param 8: 45              45            Male     147765
-    :param 9: 21              20            Male     119250
-    :param 10: 18              18            Female   129000
-    :param 11: 12              8             Male     119800
-    :param 12: 7               2             Male     79800
-    :param 13: 1               1             Male     77700
-    :param 1: yrs.since.phd  <          yrs.service
-    :param 2: rank                Prof           yrs.service    ><         salary
-    :param 3: salary         =          78182
-    :param 4: discipline          B              yrs.service    >          yrs.since.phd
-    :return: 'OperationNode' containing shows the indexes of dataframe that are wrong. & shows the index of the denial constraint that is fulfilled & no wrong instances to show (0 constrains fulfilled) --> wronginstances=matrix(0,1,2) 
+        1. idx: (double) index of the constraint, from 1 to M (number of constraints)
+        2. constraint.type: (string) The constraints can be of 3 different kinds:
+        - variableCompare: for each instance, it will compare the values of two variables (with a relation <, > or =).
+        - valueCompare: for each instance, it will compare a fixed value and a variable value (with a relation <, > or =).
+        - instanceCompare: for every couple of instances, it will compare the relation between two variables,
+        ie  if the value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2,
+        then the value of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2.
+        3. group.by: (boolean) if TRUE only one group of data (defined by a variable option) will be considered for the constraint.
+        4. group.variable: (string, only if group.by TRUE) name of the variable (column in dataFrame) that will divide our data in groups.
+        5. group.option: (only if group.by TRUE) option of the group.variable that defines the group to consider.
+        6. variable1: (string) first variable to compare (name of column in dataFrame).
+        7. relation: (string) can be < , > or = in the case of variableCompare and valueCompare, and < >, < < , > < or > >
+        in the case of instanceCompare
+        8. variable2: (string) second variable to compare (name of column in dataFrame) or fixed value for the case of valueCompare.
+    :return: 'OperationNode' containing 
+        matrix of 2 columns.
+        - first column shows the indexes of dataframe that are wrong.
+        - second column shows the index of the denial constraint that is fulfilled
+        if there are no wrong instances to show (0 constrains fulfilled) --> wronginstances=matrix(0,1,2) 
     """
     params_dict = {'dataFrame': dataFrame, 'constraintsFrame': constraintsFrame}
     return Matrix(dataFrame.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/discoverFD.py b/src/main/python/systemds/operator/algorithm/builtin/discoverFD.py
index ca2f35bf3a..11d2b7eb4e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/discoverFD.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/discoverFD.py
@@ -33,8 +33,16 @@ def discoverFD(X: Matrix,
                Mask: Matrix,
                threshold: float):
     """
-    :param will: second column from processing
-    :return: 'OperationNode' containing  
+    Implements builtin for finding functional dependencies
+    
+    
+    
+    :param X: Input Matrix X, encoded Matrix if data is categorical
+    :param Mask: A row vector for interested features i.e. Mask =[1, 0, 1]
+        will exclude the second column from processing
+    :param threshold: threshold value in interval [0, 1] for robust FDs
+    :return: 'OperationNode' containing 
+        matrix of functional dependencies 
     """
     params_dict = {'X': X, 'Mask': Mask, 'threshold': threshold}
     return Matrix(X.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dist.py b/src/main/python/systemds/operator/algorithm/builtin/dist.py
index 6122871497..933bfe0de9 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dist.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dist.py
@@ -30,7 +30,14 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def dist(X: Matrix):
+    """
+    Returns Euclidean distance matrix (distances between N n-dimensional points)
     
+    
+    :param X: Matrix to calculate the distance inside
+    :return: 'OperationNode' containing 
+        euclidean distance matrix 
+    """
     params_dict = {'X': X}
     return Matrix(X.sds_context,
         'dist',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dmv.py b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
index 2a6eaa4952..c95b9535cc 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dmv.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
@@ -32,9 +32,15 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def dmv(X: Frame,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    The dmv-function is used to find disguised missing values utilising syntactical pattern recognition.
+    
+    
+    :param X: Input Frame
     :param threshold: Threshold value in interval [0, 1] for dominant pattern per column (e.g., 0.8 means
+        that 80% of the entries per column must adhere this pattern to be dominant)
     :param replace: The string disguised missing values are replaced with
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        frame x including detected disguised missing values 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ema.py b/src/main/python/systemds/operator/algorithm/builtin/ema.py
index ab4ba1a675..485bc60463 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ema.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ema.py
@@ -37,14 +37,19 @@ def ema(X: Frame,
         beta: float,
         gamma: float):
     """
-    :param search_iterations: Budget iterations for parameter optimisation,
-    :param used: weren't set
+    This function imputes values with exponential moving average (single, double or triple).
+    
+    
+    :param X: Frame that contains time series data that needs to be imputed
+        search_iterations	Integer	--	Budget iterations for parameter optimization,
+        used if parameters weren't set
     :param mode: Type of EMA method. Either "single", "double" or "triple"
     :param freq: Seasonality when using triple EMA.
     :param alpha: alpha- value for EMA
     :param beta: beta- value for EMA
     :param gamma: gamma- value for EMA
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        frame with ema results 
     """
     params_dict = {'X': X, 'search_iterations': search_iterations, 'mode': mode, 'freq': freq, 'alpha': alpha, 'beta': beta, 'gamma': gamma}
     return Matrix(X.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
index 31235e5910..52a885f182 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
@@ -40,9 +40,24 @@ def executePipeline(pipeline: Frame,
                     verbose: bool,
                     **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This function execute pipeline.
+    
+    
+    :param logical: ---
+    :param pipeline: ---
+    :param X: ---
+    :param Y: ---
+    :param Xtest: ---
+    :param Ytest: ---
+    :param metaList: ---
+    :param hyperParameters: ---
+    :param hpForPruning: ---
+    :param changesByOp: ---
     :param flagsCount: ---
     :param test: ---
-    :return: 'OperationNode' containing  
+    :param verbose: ---
+    :return: 'OperationNode' containing 
+        --------------------- 
     """
     params_dict = {'pipeline': pipeline, 'Xtrain': Xtrain, 'Ytrain': Ytrain, 'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'hyperParameters': hyperParameters, 'flagsCount': flagsCount, 'verbose': verbose}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
index 3c6244dc1e..e262b747e6 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
@@ -33,8 +33,14 @@ def ffPredict(model: List,
               X: Matrix,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This builtin function makes prediction given data and trained feedforward neural network model
+    
+    
+    :param Model: Trained ff neural network model
+    :param X: Data used for making predictions
     :param batch_size: Batch size
-    :return: 'OperationNode' containing value 
+    :return: 'OperationNode' containing 
+        predicted value 
     """
     params_dict = {'model': model, 'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py b/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
index be01b16a55..366493c136 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ffTrain.py
@@ -35,16 +35,26 @@ def ffTrain(X: Matrix,
             loss_fcn: str,
             **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This builtin function trains simple feed-forward neural network. The architecture of the
+    networks is: affine1 -> relu -> dropout -> affine2 -> configurable output activation function.
+    Hidden layer has 128 neurons. Dropout rate is 0.35. Input and output sizes are inferred from X and Y.
+    
+    
+    :param X: Training data
+    :param Y: Labels/Target values
     :param batch_size: Batch size
     :param epochs: Number of epochs
     :param learning_rate: Learning rate
-    :param out_activation: User specified ouptut activation function. Possible values:
+    :param out_activation: User specified output activation function. Possible values:
+        "sigmoid", "relu", "lrelu", "tanh", "softmax", "logits" (no activation).
     :param loss_fcn: User specified loss function. Possible values:
+        "l1", "l2", "log_loss", "logcosh_loss", "cel" (cross-entropy loss).
     :param shuffle: Flag which indicates if dataset should be shuffled or not
     :param validation_split: Fraction of training set used as validation set
     :param seed: Seed for model initialization
     :param verbose: Flag which indicates if function should print to stdout
-    :return: 'OperationNode' containing by the model & by the model 
+    :return: 'OperationNode' containing 
+        trained model which can be used in ffpredict 
     """
     params_dict = {'X': X, 'Y': Y, 'out_activation': out_activation, 'loss_fcn': loss_fcn}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
index 34bee247be..9acea1623a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
@@ -37,7 +37,26 @@ def fit_pipeline(trainData: Frame,
                  evaluationFunc: str,
                  evalFunHp: Matrix,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+    This script will read the dirty and clean data, then it will apply the best pipeline on dirty data
+    and then will classify both cleaned dataset and check if the cleaned dataset is performing same as original dataset
+    in terms of classification accuracy
     
+    
+    :param NAME: MEANING
+    :param trainData: ---
+    :param testData: ---
+    :param metaData: ---
+    :param lp: ---
+    :param pip: ---
+    :param hp: ---
+    :param evaluationFunc: ---
+    :param evalFunHp: ---
+    :param isLastLabel: ---
+    :param correctTypos: ---
+    :return: 'OperationNode' containing 
+        meaning--- 
+    """
     params_dict = {'trainData': trainData, 'testData': testData, 'pip': pip, 'applyFunc': applyFunc, 'hp': hp, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp}
     params_dict.update(kwargs)
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
index d70ca4a42c..a9730be4dc 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
@@ -32,7 +32,17 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def fixInvalidLengths(F1: Frame,
                       mask: Matrix,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+    Fix invalid lengths
     
+    
+    :param F1: ---
+    :param mask: ---
+    :param ql: ---
+    :param qu: ---
+    :return: 'OperationNode' containing 
+        ------ 
+    """
     params_dict = {'F1': F1, 'mask': mask}
     params_dict.update(kwargs)
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
index 2fa9c5f748..31440f9f98 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
@@ -33,7 +33,18 @@ def fixInvalidLengthsApply(X: Frame,
                            mask: Matrix,
                            qLow: Matrix,
                            qUp: Matrix):
+    """
+    Fix invalid lengths
     
+    
+    :param NAME: MEANING
+    :param X: ---
+    :param mask: ---
+    :param ql: ---
+    :param qu: ---
+    :return: 'OperationNode' containing 
+        meaning------ 
+    """
     params_dict = {'X': X, 'mask': mask, 'qLow': qLow, 'qUp': qUp}
     return Matrix(X.sds_context,
         'fixInvalidLengthsApply',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
index 1199c8529c..692d925a48 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
@@ -32,7 +32,16 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def frameSort(F: Frame,
               mask: Matrix,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+    Related to [SYSTEMDS-2662] dependency function for cleaning pipelines
+    Built-in for sorting frames
     
+    
+    :param F: Data frame of string values
+    :param mask: matrix for identifying string columns
+    :return: 'OperationNode' containing 
+        sorted dataset by column 1 in decreasing order 
+    """
     params_dict = {'F': F, 'mask': mask}
     params_dict.update(kwargs)
     return Matrix(F.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/frequencyEncode.py b/src/main/python/systemds/operator/algorithm/builtin/frequencyEncode.py
index 471dfe879a..3ae4a8cfc0 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/frequencyEncode.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frequencyEncode.py
@@ -31,7 +31,15 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 
 def frequencyEncode(X: Matrix,
                     mask: Matrix):
+    """
+    function frequency conversion
     
+    
+    :param X: dataset x
+    :param mask: mask of the columns for frequency conversion
+    :return: 'OperationNode' containing 
+        categorical columns are replaced with their frequenciesthe frequency counts for the different categoricals 
+    """
     params_dict = {'X': X, 'mask': mask}
     
     vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/frequencyEncodeApply.py b/src/main/python/systemds/operator/algorithm/builtin/frequencyEncodeApply.py
index da1370a872..17d7e3118b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/frequencyEncodeApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frequencyEncodeApply.py
@@ -31,7 +31,15 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 
 def frequencyEncodeApply(X: Matrix,
                          freqCount: Matrix):
+    """
+    frequency code apply
     
+    
+    :param X: dataset x
+    :param freqCount: the frequency counts for the different categoricals
+    :return: 'OperationNode' containing 
+        categorical columns are replaced with their frequencies given 
+    """
     params_dict = {'X': X, 'freqCount': freqCount}
     return Matrix(X.sds_context,
         'frequencyEncodeApply',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/garch.py b/src/main/python/systemds/operator/algorithm/builtin/garch.py
index dab118eef1..9d647c2249 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/garch.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/garch.py
@@ -39,6 +39,15 @@ def garch(X: Matrix,
           sim_seed: int,
           verbose: bool):
     """
+    This is a builtin function that implements GARCH(1,1), a statistical model used in analyzing time-series data where the variance
+    error is believed to be serially autocorrelated
+    COMMENTS
+    This has some drawbacks: slow convergence of optimization (sort of simulated annealing/gradient descent)
+    TODO: use BFGS or BHHH if it is available (this are go to methods)
+    TODO: (only then) extend to garch(p,q); otherwise the search space is way too big for the current method
+    
+    
+    :param X: The input Matrix to apply Arima on.
     :param kmax: Number of iterations
     :param momentum: Momentum for momentum-gradient descent (set to 0 to deactivate)
     :param start_stepsize: Initial gradient-descent stepsize
@@ -47,7 +56,8 @@ def garch(X: Matrix,
     :param end_vicinity: same at end (linear decay)
     :param sim_seed: seed for simulation of process on fitted coefficients
     :param verbose: verbosity, comments during fitting
-    :return: 'OperationNode' containing term of fitted process & arch-coefficient of fitted process & garch-coefficient of fitted process & drawbacks: slow convergence of optimization (sort of simulated annealing/gradient descent) 
+    :return: 'OperationNode' containing 
+        simulated garch(1,1) process on fitted coefficientsvariances of simulated fitted processonstant term of fitted process1-st arch-coefficient of fitted process1-st garch-coefficient of fitted process 
     """
     params_dict = {'X': X, 'kmax': kmax, 'momentum': momentum, 'start_stepsize': start_stepsize, 'end_stepsize': end_stepsize, 'start_vicinity': start_vicinity, 'end_vicinity': end_vicinity, 'sim_seed': sim_seed, 'verbose': verbose}
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py b/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
index 5f35c9557e..976af9e91e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
@@ -33,9 +33,20 @@ def gaussianClassifier(D: Matrix,
                        C: Matrix,
                        **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Computes the parameters needed for Gaussian Classification.
+    Thus it computes the following per class: the prior probability,
+    the inverse covariance matrix, the mean per feature and the determinant
+    of the covariance matrix. Furthermore (if not explicitly defined), it
+    adds some small smoothing value along the variances, to prevent
+    numerical errors / instabilities.
+    
+    
+    :param D: Input matrix (training set)
+    :param C: Target vector
     :param varSmoothing: Smoothing factor for variances
     :param verbose: Print accuracy of the training set
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        vector storing the class prior probabilitiesmatrix storing the means of the classeslist of inverse covariance matricesvector storing the determinants of the classes 
     """
     params_dict = {'D': D, 'C': C}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py b/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
index c1bffc1754..4af6063a2a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/getAccuracy.py
@@ -33,8 +33,14 @@ def getAccuracy(y: Matrix,
                 yhat: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This builtin function compute the weighted and simple accuracy for given predictions
+    
+    
+    :param y: Ground truth (Actual Labels)
+    :param yhat: Predictions (Predicted labels)
     :param isWeighted: Flag for weighted or non-weighted accuracy calculation
-    :return: 'OperationNode' containing of the predicted labels 
+    :return: 'OperationNode' containing 
+        accuracy of the predicted labels 
     """
     params_dict = {'y': y, 'yhat': yhat}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/glm.py b/src/main/python/systemds/operator/algorithm/builtin/glm.py
index a15ea9ec2a..24af5b4d54 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/glm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/glm.py
@@ -33,18 +33,93 @@ def glm(X: Matrix,
         Y: Matrix,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This script solves GLM regression using NEWTON/FISHER scoring with trust regions. The glm-function is a flexible
+    generalization of ordinary linear regression that allows for response variables that have error distribution models.
+    In addition, some GLM statistics are provided as console output by setting verbose=TRUE, one comma-separated name-value
+    pair per each line, as follows:
+    ----------------------------------------------------------------------------------------------------------------------
+    TERMINATION_CODE      A positive integer indicating success/failure as follows:
+                          1 = Converged successfully; 2 = Maximum number of iterations reached; 
+                          3 = Input (X, Y) out of range; 4 = Distribution/link is not supported
+    BETA_MIN              Smallest beta value (regression coefficient), excluding the intercept
+    BETA_MIN_INDEX        Column index for the smallest beta value
+    BETA_MAX              Largest beta value (regression coefficient), excluding the intercept
+    BETA_MAX_INDEX        Column index for the largest beta value
+    INTERCEPT             Intercept value, or NaN if there is no intercept (if icpt=0)
+    DISPERSION            Dispersion used to scale deviance, provided as "disp" input parameter
+                          or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0
+    DISPERSION_EST        Dispersion estimated from the dataset
+    DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion == 1.0
+    DEVIANCE_SCALED       Deviance from the saturated model, scaled by the DISPERSION value
+    ----------------------------------------------------------------------------------------------------------------------
+    The Log file, when requested, contains the following per-iteration variables in CSV format,
+    each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values:
+    ----------------------------------------------------------------------------------------------------------------------
+    NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this outer iteration
+    IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+    POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") to new point
+    OBJECTIVE             The loss function we minimize (i.e. negative partial log-likelihood)
+    OBJ_DROP_REAL         Reduction in the objective during this iteration, actual value
+    OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic approximation
+    OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update the trust region
+    GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes omitted)
+    LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for overflows
+    LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for overflows
+    IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old point restored
+    TRUST_DELTA           Updated trust region size, the "delta"
+    ----------------------------------------------------------------------------------------------------------------------
+    SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
+    AND LINK FUNCTIONS:
+    dfam vpow link lpow  Distribution.link   nical?
+    ----------------------------------------------------------------------------------------------------------------------
+     1   0.0   1  -1.0   Gaussian.inverse
+     1   0.0   1   0.0   Gaussian.log
+     1   0.0   1   1.0   Gaussian.id          Yes
+     1   1.0   1   0.0   Poisson.log          Yes
+     1   1.0   1   0.5   Poisson.sqrt
+     1   1.0   1   1.0   Poisson.id
+     1   2.0   1  -1.0   Gamma.inverse        Yes
+     1   2.0   1   0.0   Gamma.log
+     1   2.0   1   1.0   Gamma.id
+     1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+     1   3.0   1  -1.0   InvGaussian.inverse
+     1   3.0   1   0.0   InvGaussian.log
+     1   3.0   1   1.0   InvGaussian.id
+     1    *    1    *    AnyVariance.AnyLink
+    ----------------------------------------------------------------------------------------------------------------------
+     2    *    1   0.0   Binomial.log
+     2    *    1   0.5   Binomial.sqrt
+     2    *    2    *    Binomial.logit       Yes
+     2    *    3    *    Binomial.probit
+     2    *    4    *    Binomial.cloglog
+     2    *    5    *    Binomial.cauchit
+    ----------------------------------------------------------------------------------------------------------------------
+    
+    
+    :param X: matrix X of feature vectors
+    :param Y: matrix Y with either 1 or 2 columns:
+        if dfam = 2, Y is 1-column Bernoulli or 2-column Binomial (#pos, #neg)
     :param dfam: Distribution family code: 1 = Power, 2 = Binomial
     :param vpow: Power for Variance defined as (mean)^power (ignored if dfam != 1):
+        0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
     :param link: Link function code: 0 = canonical (depends on distribution),
+        1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit
     :param lpow: Power for Link function defined as (mean)^power (ignored if link != 1):
+        -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
     :param yneg: Response value for Bernoulli "No" label, usually 0.0 or -1.0
     :param icpt: Intercept presence, X columns shifting and rescaling:
+        0 = no intercept, no shifting, no rescaling;
+        1 = add intercept, but neither shift nor rescale X;
+        2 = add intercept, shift & rescale X columns to mean = 0, variance = 1
     :param reg: Regularization parameter (lambda) for L2 regularization
     :param tol: Tolerance (epsilon)
     :param disp: (Over-)dispersion value, or 0.0 to estimate it from data
     :param moi: Maximum number of outer (Newton / Fisher Scoring) iterations
     :param mii: Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum
-    :return: 'OperationNode' containing line, as follows: & integer indicating success/failure as follows: & value (regression coefficient), excluding the intercept & for the smallest beta value & value (regression coefficient), excluding the intercept & for the largest beta value & or nan if there is no intercept (if icpt=0) & to scale deviance, provided as "disp" input parameter & from the dataset & the saturated model, assuming dispersion == 1.0 & the saturated model, scaled by the di [...]
+    :param verbose: if the Algorithm should be verbose
+    :return: 'OperationNode' containing 
+        matrix beta, whose size depends on icpt:
+        icpt=0: ncol(x) x 1;  icpt=1: (ncol(x) + 1) x 1;  icpt=2: (ncol(x) + 1) x 2 
     """
     params_dict = {'X': X, 'Y': Y}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
index 38d7ef570f..234078c868 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
@@ -33,13 +33,62 @@ def glmPredict(X: Matrix,
                B: Matrix,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    Applies the estimated parameters of a GLM type regression to a new dataset
+    Additional statistics are printed one per each line, in the following 
+    CSV format: NAME,[COLUMN],[SCALED],VALUE
+    ---
+    NAME   is the string identifier for the statistic, see the table below.
+    COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
+           note that a Binomial/Multinomial one-column Y input is converted into multi-column.
+    SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
+             dispersion parameter (disp) scaling has been applied to this statistic.
+    VALUE  is the value of the statistic.
+    ---
+    NAME                  COLUMN  SCALED  MEANING
+    ---------------------------------------------------------------------------------------------
+    LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
+    LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
+    PEARSON_X2                      +     Pearson residual X^2 statistic
+    PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
+    PEARSON_X2_PVAL                 +     Pearson X^2 p-value
+    DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
+    DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
+    DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
+    AVG_TOT_Y               +             Average of Y column for a single response value
+    STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
+    AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
+    STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
+    PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
+    R2                      +             R^2 of Y column residual with bias included
+    ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
+    R2_NOBIAS               +             R^2 of Y column residual with bias subtracted
+    ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
+    ---------------------------------------------------------------------------------------------
+    
+    
+    :param X: Matrix X of records (feature vectors)
+    :param B: GLM regression parameters (the betas), with dimensions
+        ncol(X)   x k: do not add intercept
+        ncol(X)+1 x k: add intercept as given by the last B-row
+        if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
+    :param ytest: Response matrix Y, with the following dimensions:
+        nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
+        nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
+        nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
     :param dfam: GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
     :param vpow: Power for Variance defined as (mean)^power (ignored if dfam != 1):
+        0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
     :param link: Link function code: 0 = canonical (depends on distribution), 1 = Power,
+        2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
     :param lpow: Power for Link function defined as (mean)^power (ignored if link != 1):
+        -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
     :param disp: Dispersion value, when available
     :param verbose: Print statistics to stdout
-    :return: 'OperationNode' containing printed one per each line, in the following  & string identifier for the statistic, see the table below. & optional integer value that specifies the y-column for per-column statistics; & binomial/multinomial one-column y input is converted into multi-column. & optional boolean value (true or false) that tells us whether or not the input & value of the statistic. & meaning 
+    :return: 'OperationNode' containing 
+        matrix m of predicted means/probabilities:
+        nrow(x) x 1  : for power-type distributions (dfam=1)
+        nrow(x) x 2  : for binomial distribution (dfam=2), column 2 is "no"
+        nrow(x) x k+1: for multinomial logit (dfam=3), col# k+1 is baseline 
     """
     params_dict = {'X': X, 'B': B}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmm.py b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
index e2f74fab8f..726938d159 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
@@ -32,13 +32,24 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def gmm(X: Matrix,
         **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    The gmm-function implements builtin Gaussian Mixture Model with four different types of covariance matrices
+    i.e., VVV, EEE, VVI, VII and two initialization methods namely "kmeans" and "random".
+    
+    
+    :param X: Matrix X
     :param n_components: Number of n_components in the Gaussian mixture model
     :param model: "VVV": unequal variance (full),each component has its own general covariance matrix
+        "EEE": equal variance (tied), all components share the same general covariance matrix
+        "VVI": spherical, unequal volume (diag), each component has its own diagonal
+        covariance matrix
+        "VII": spherical, equal volume (spherical), each component has its own single variance
     :param init_param: initialize weights with "kmeans" or "random"
     :param iterations: Number of iterations
     :param reg_covar: regularization parameter for covariance matrix
     :param tol: tolerance value for convergence
-    :return: 'OperationNode' containing of estimated parameters & information criterion for best iteration & kth class 
+    :return: 'OperationNode' containing 
+        prediction matrixprobability of the predictionsnumber of estimated parametersbayesian information criterion for best iterationfitted clusters meana matrix whose [i,k]th entry is the probability that observation i in the test data
+        belongs to the kth class 
     """
     params_dict = {'X': X}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
index a920d94590..749ed39d67 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gmmPredict.py
@@ -35,8 +35,17 @@ def gmmPredict(X: Matrix,
                precisions_cholesky: Matrix,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    This function is a Prediction function for a Gaussian Mixture Model (gmm).
+    compute posterior probabilities for new instances given the variance and mean of fitted data
+    
+    
+    :param X: Matrix X (instances to be clustered)
+    :param weight: Weight of learned model
+    :param mu: fitted clusters mean
+    :param precisions_cholesky: fitted precision matrix for each mixture
     :param model: fitted model
-    :return: 'OperationNode' containing cluster labels & of belongingness & for new instances given the variance and mean of fitted data 
+    :return: 'OperationNode' containing 
+        predicted cluster labelsprobabilities of belongingness 
     """
     params_dict = {'X': X, 'weight': weight, 'mu': mu, 'precisions_cholesky': precisions_cholesky}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gnmf.py b/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
index 7d1d0cd408..29fc19c3a5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gnmf.py
@@ -33,10 +33,21 @@ def gnmf(X: Matrix,
          rnk: int,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    The gnmf-function does Gaussian Non-Negative Matrix Factorization. In this, a matrix X is factorized into two
+    matrices W and H, such that all three matrices have no negative elements. This non-negativity makes the resulting
+    matrices easier to inspect.
+    References:
+    [Chao Liu, Hung-chih Yang, Jinliang Fan, Li-Wei He, Yi-Min Wang:
+    Distributed nonnegative matrix factorization for web-scale dyadic
+    data analysis on mapreduce. WWW 2010: 681-690]
+    
+    
+    :param X: Matrix of feature vectors.
     :param rnk: Number of components into which matrix X is to be factored
     :param eps: Tolerance
     :param maxi: Maximum number of conjugate gradient iterations
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        list of pattern matrices, one for each repetitionlist of amplitude matrices, one for each repetition 
     """
     params_dict = {'X': X, 'rnk': rnk}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
index 42304818b9..5eb5709654 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
@@ -37,19 +37,32 @@ def gridSearch(X: Matrix,
                paramValues: List,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
+    The gridSearch-function is used to find the optimal hyper-parameters of a model which results in the most
+    accurate predictions. This function takes train and eval functions by name.
+    
+    
+    :param X: Input feature matrix
+    :param y: Input Matrix of vectors.
     :param train: Name ft of the train function to call via ft(trainArgs)
     :param predict: Name fp of the loss function to call via fp((predictArgs,B))
     :param numB: Maximum number of parameters in model B (pass the max because the size
-    :param may: parameters like icpt or multi-class classification)
-    :param columnvectors: hyper-parameters in 'params'
-    :param gridSearch: hyper-parameter by name, if
-    :param not: an empty list, the lm parameters are used
-    :param gridSearch: trained models at the end, if
-    :param not: an empty list, list(X, y) is used instead
+        may vary with parameters like icpt or multi-class classification)
+    :param params: List of varied hyper-parameter names
+    :param dataArgs: List of data parameters (to identify data parameters by name i.e. list("X", "Y"))
+    :param paramValues: List of matrices providing the parameter values as
+        columnvectors for position-aligned hyper-parameters in 'params'
+    :param trainArgs: named List of arguments to pass to the 'train' function, where
+        gridSearch replaces enumerated hyper-parameter by name, if
+        not provided or an empty list, the lm parameters are used
+    :param predictArgs: List of arguments to pass to the 'predict' function, where
+        gridSearch appends the trained models at the end, if
+        not provided or an empty list, list(X, y) is used instead
     :param cv: flag enabling k-fold cross validation, otherwise training loss
     :param cvk: if cv=TRUE, specifies the the number of folds, otherwise ignored
     :param verbose: flag for verbose debug output
-    :return: 'OperationNode' containing returned as a column-major linearized column vector 
+    :return: 'OperationNode' containing 
+        matrix[double]the trained model with minimal loss (by the 'predict' function)
+        multi-column models are returned as a column-major linearized column vectorone-row frame w/ optimal hyper-parameters (by 'params' position) 
     """
     params_dict = {'X': X, 'y': y, 'train': train, 'predict': predict, 'params': params, 'paramValues': paramValues}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/hospitalResidencyMatch.py b/src/main/python/systemds/operator/algorithm/builtin/hospitalResidencyMatch.py
index edfdf86a4e..5db64203ac 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/hospitalResidencyMatch.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/hospitalResidencyMatch.py
@@ -34,12 +34,52 @@ def hospitalResidencyMatch(R: Matrix,
                            capacity: Matrix,
                            **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param It: an ORDERED  matrix.
-    :param It: an UNORDRED matrix.
-    :param It: a [n*1] matrix with non zero values.
-    :param with: and vice-versa (higher is better).
+    This script computes a solution for the hospital residency match problem.
+    Residents.mtx:
+    2.0,1.0,3.0
+    1.0,2.0,3.0
+    1.0,2.0,0.0
+    Since it is an ORDERED  matrix, this means that Resident 1 (row 1) likes hospital 2 the most, followed by hospital 1 and hospital 3.
+    If it was UNORDERED, this would mean that resident 1 (row 1) likes hospital 3 the most (since the value at [1,3] is the row max),
+    followed by hospital 1 (2.0 preference value) and hospital 2 (1.0 preference value).
+    Hospitals.mtx:
+    2.0,1.0,0.0
+    0.0,1.0,2.0
+    1.0,2.0,0.0
+    Since it is an UNORDERED matrix this means that Hospital 1 (row 1) likes Resident 1 the most (since the value at [1,1] is the row max).
+    capacity.mtx
+    1.0
+    1.0
+    1.0
+    residencyMatch.mtx
+    2.0,0.0,0.0
+    1.0,0.0,0.0
+    0.0,2.0,0.0
+    hospitalMatch.mtx
+    0.0,1.0,0.0
+    0.0,0.0,2.0
+    1.0,0.0,0.0
+    Resident 1 has matched with Hospital 3 (since [1,3] is non-zero) at a preference level of 2.0.
+    Resident 2 has matched with Hospital 1 (since [2,1] is non-zero) at a preference level of 1.0.
+    Resident 3 has matched with Hospital 2 (since [3,2] is non-zero) at a preference level of 2.0.
+    
+    
+    :param R: Residents matrix R.
+        It must be an ORDERED  matrix.
+    :param H: Hospitals matrix H.
+        It must be an UNORDRED matrix.
+    :param capacity: capacity of Hospitals matrix C.
+        It must be a [n*1] matrix with non zero values.
+        i.e. the leftmost value in a row is the most preferred partner's index.
+        i.e. the leftmost value in a row in P is the preference value for the acceptor
+        with index 1 and vice-versa (higher is better).
     :param verbose: If the operation is verbose
-    :return: 'OperationNode' containing an ordered  matrix, this means that resident 1 (row 1) likes hospital 2 the most, followed by hospital 1 and hospital 3. & unordered, this would mean that resident 1 (row 1) likes hospital 3 the most (since the value at [1,3] is the row max), & 1 (2.0 preference value) and hospital 2 (1.0 preference value). & an unordered matrix this means that hospital 1 (row 1) likes resident 1 the most (since the value at [1,1] is the row max). & matched with ho [...]
+    :return: 'OperationNode' containing 
+        result matrix
+        if cell [i,j] is non-zero, it means that resident i has matched with hospital j.
+        further, if cell [i,j] is non-zero, it holds the preference value that led to the match.result matrix
+        if cell [i,j] is non-zero, it means that resident i has matched with hospital j.
+        further, if cell [i,j] is non-zero, it holds the preference value that led to the match. 
     """
     params_dict = {'R': R, 'H': H, 'capacity': capacity}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
index 727bb51bec..ea3ad26085 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
@@ -37,9 +37,27 @@ def hyperband(X_train: Matrix,
               paramRanges: Matrix,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-    :param One: hyper parameter, first column specifies min, second column max value.
+    The hyperband-function is used for hyper parameter optimization and is based on multi-armed bandits and early
+    elimination. Through multiple parallel brackets and consecutive trials it will return the hyper parameter combination
+    which performed best on a validation dataset. A set of hyper parameter combinations is drawn from uniform distributions
+    with given ranges; Those make up the candidates for hyperband. Notes:
+       hyperband is hard-coded for lmCG, and uses lmPredict for validation
+       hyperband is hard-coded to use the number of iterations as a resource
+       hyperband can only optimize continuous hyperparameters
+    
+    
+    :param X_train: Input Matrix of training vectors
+    :param y_train: Labels for training vectors
+    :param X_val: Input Matrix of validation vectors
+    :param y_val: Labels for validation vectors
+    :param params: List of parameters to optimize
+    :param paramRanges: The min and max values for the uniform distributions to draw from.
+        One row per hyper parameter, first column specifies min, second column max value.
+    :param R: Controls number of candidates evaluated
+    :param eta: Determines fraction of candidates to keep after each trial
     :param verbose: If TRUE print messages are activated
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        1-column matrix of weights of best performing candidatehyper parameters of best performing candidate 
     """
     params_dict = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'params': params, 'paramRanges': paramRanges}
     params_dict.update(kwargs)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py b/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
index 5a0158a7c4..2aba1a5b16 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_brightness.py
@@ -33,9 +33,14 @@ def img_brightness(img_in: Matrix,
                    value: float,
                    channel_max: int):
     """
+    The img_brightness-function is an image data augmentation function. It changes the brightness of the image.
+    
+    
+    :param img_in: Input matrix/image
     :param value: The amount of brightness to be changed for the image
     :param channel_max: Maximum value of the brightness of the image
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        type                         meaningmatrix[double]               output matrix/image 
     """
     params_dict = {'img_in': img_in, 'value': value, 'channel_max': channel_max}
     return Matrix(img_in.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_crop.py b/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
index e224464da3..a5affa8ac1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_crop.py
@@ -35,11 +35,16 @@ def img_crop(img_in: Matrix,
              x_offset: int,
              y_offset: int):
     """
+    The img_crop-function is an image data augmentation function. It cuts out a subregion of an image.
+    
+    
+    :param img_in: Input matrix/image
     :param w: The width of the subregion required
     :param h: The height of the subregion required
     :param x_offset: The horizontal coordinate in the image to begin the crop operation
     :param y_offset: The vertical coordinate in the image to begin the crop operation
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        cropped matrix/image 
     """
     params_dict = {'img_in': img_in, 'w': w, 'h': h, 'x_offset': x_offset, 'y_offset': y_offset}
     return Matrix(img_in.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py b/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
index b1c6bb4cff..de2e15026c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_cutout.py
@@ -36,12 +36,17 @@ def img_cutout(img_in: Matrix,
                height: int,
                fill_value: float):
     """
+    Image Cutout function replaces a rectangular section of an image with a constant value.
+    
+    
+    :param img_in: Input image as 2D matrix with top left corner at [1, 1]
     :param x: Column index of the top left corner of the rectangle (starting at 1)
     :param y: Row index of the top left corner of the rectangle (starting at 1)
     :param width: Width of the rectangle (must be positive)
     :param height: Height of the rectangle (must be positive)
     :param fill_value: The value to set for the rectangle
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        output image as 2d matrix with top left corner at [1, 1] 
     """
     params_dict = {'img_in': img_in, 'x': x, 'y': y, 'width': width, 'height': height, 'fill_value': fill_value}
     return Matrix(img_in.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_invert.py b/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
index 0e327352c2..a88c0f4339 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_invert.py
@@ -32,8 +32,13 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def img_invert(img_in: Matrix,
                max_value: float):
     """
+    This is an image data augmentation function. It inverts an image.
+    
+    
+    :param img_in: Input image
     :param max_value: The maximum value pixels can have
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        output image 
     """
     params_dict = {'img_in': img_in, 'max_value': max_value}
     return Matrix(img_in.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py b/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
index 659dce8ab4..8dc15c5830 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_mirror.py
@@ -32,8 +32,14 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def img_mirror(img_in: Matrix,
                horizontal_axis: bool):
     """
+    This function is an image data augmentation function.
+    It flips an image on the X (horizontal) or Y (vertical) axis.
+    
+    
+    :param img_in: Input matrix/image
     :param max_value: The maximum value pixels can have
-    :return: 'OperationNode' containing  
+    :return: 'OperationNode' containing 
+        flipped matrix/image 
     """
     params_dict = {'img_in': img_in, 'horizontal_axis': horizontal_axis}
     return Matrix(img_in.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py b/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
index d5c6e46b52..083824e37e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/img_posterize.py
@@ -32,9 +32,15 @@ from systemds.utils.consts import VALID_INPUT_TYPES
 def img_posterize(img_in: Matrix,
                   bits: int):
     """
+    The Image Posterize function limits pixel values to 2^bits different values in the range [0, 255].
+    Assumes the input image can attain values in the range [0, 255].
+    
+    
+    :param img_in: Input image
     :param bits: The number of bits keep for the values.
-    :param 1: and white, 8 means every integer between 0 and 255.
-    :return: 'OperationNode' containing  
+        1 means black and white, 8 means every integer between 0 and 255.
+    :return: 'OperationNode' containing 
+        output image 
     """
     params_dict = {'img_in': img_in, 'bits': bits}
... 2421 lines suppressed ...