You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by gw...@apache.org on 2017/05/12 17:23:27 UTC
incubator-systemml git commit: [SYSTEMML-1605] Add zeppelin sample using new MLContext

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6adcb369d -> 345e1ab43


[SYSTEMML-1605] Add zeppelin sample using new MLContext

Added example for Linear Regression Conjugate Gradient using new
MLContext and removed previous Linear Regression example based on
deprecated MLContext.

Closes #494.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/345e1ab4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/345e1ab4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/345e1ab4

Branch: refs/heads/master
Commit: 345e1ab432df4e7efec9d96f50f95c397f1eafb6
Parents: 6adcb36
Author: Glenn Weidner <gw...@us.ibm.com>
Authored: Fri May 12 10:19:38 2017 -0700
Committer: Glenn Weidner <gw...@us.ibm.com>
Committed: Fri May 12 10:19:38 2017 -0700

----------------------------------------------------------------------
 samples/zeppelin-notebooks/2AZ2AQ12B/note.json  | 392 -------------------
 .../SystemML_LinearRegCG.json                   |   1 +
 2 files changed, 1 insertion(+), 392 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/345e1ab4/samples/zeppelin-notebooks/2AZ2AQ12B/note.json
----------------------------------------------------------------------
diff --git a/samples/zeppelin-notebooks/2AZ2AQ12B/note.json b/samples/zeppelin-notebooks/2AZ2AQ12B/note.json
deleted file mode 100644
index 174e9f7..0000000
--- a/samples/zeppelin-notebooks/2AZ2AQ12B/note.json
+++ /dev/null
@@ -1,392 +0,0 @@
-{
-  "paragraphs": [
-    {
-      "text": "// Trigger Spark Startup\nsc",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444073218763_655887574",
-      "id": "20151005-122658_592219673",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "res8: org.apache.spark.SparkContext \u003d org.apache.spark.SparkContext@6ce70bf3\n"
-      },
-      "dateCreated": "Oct 5, 2015 12:26:58 PM",
-      "dateStarted": "Oct 12, 2015 10:47:23 AM",
-      "dateFinished": "Oct 12, 2015 10:47:23 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Generate data\nimport org.apache.spark.mllib.util.LinearDataGenerator\n\nval numRows \u003d 10000\nval numCols \u003d 1000\nval rawData \u003d LinearDataGenerator.generateLinearRDD(sc, numRows, numCols, 1).toDF()\n\n// Repartition into a more parallelism-friendly number of partitions\nval data \u003d rawData.repartition(64).cache()",
-      "dateUpdated": "Oct 12, 2015 10:49:12 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444067726904_-135213052",
-      "id": "20151005-105526_1974722763",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "import org.apache.spark.mllib.util.LinearDataGenerator\nnumRows: Int \u003d 10000\nnumCols: Int \u003d 1000\nrawData: org.apache.spark.sql.DataFrame \u003d [label: double, features: vector]\ndata: org.apache.spark.sql.DataFrame \u003d [label: double, features: vector]\n"
-      },
-      "dateCreated": "Oct 5, 2015 10:55:26 AM",
-      "dateStarted": "Oct 12, 2015 10:49:12 AM",
-      "dateFinished": "Oct 12, 2015 10:49:13 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Spark ML\nimport org.apache.spark.ml.regression.LinearRegression\n\n// Model Settings\nval maxIters \u003d 100\nval reg \u003d 0\nval elasticNetParam \u003d 0  // L2 reg\n\n// Fit the model\nval lr \u003d new LinearRegression()\n  .setMaxIter(maxIters)\n  .setRegParam(reg)\n  .setElasticNetParam(elasticNetParam)\nval start \u003d System.currentTimeMillis()\nval model \u003d lr.fit(data)\nval trainingTime \u003d (System.currentTimeMillis() - start).toDouble / 1000.0\n\n// Summarize the model over the training set and gather some metrics\nval trainingSummary \u003d model.summary\nval r2 \u003d trainingSummary.r2\nval iters \u003d trainingSummary.totalIterations\nval trainingTimePerIter \u003d trainingTime / iters",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala",
-        "tableHide": false
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444072136657_1600671053",
-      "id": "20151005-120856_674927719",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "import org.apache.spark.ml.regression.LinearRegression\nmaxIters: Int \u003d 100\nreg: Int \u003d 0\nelasticNetParam: Int \u003d 0\nlr: org.apache.spark.ml.regression.LinearRegression \u003d linReg_a7f51d676562\nstart: Long \u003d 1444672044647\nmodel: org.apache.spark.ml.regression.LinearRegressionModel \u003d linReg_a7f51d676562\ntrainingTime: Double \u003d 12.985\ntrainingSummary: org.apache.spark.ml.regression.LinearRegressionTrainingSummary \u003d org.apache.spark.ml.regression.LinearRegressionTrainingSummary@227ba28b\nr2: Double \u003d 0.9677118209276552\niters: Int \u003d 17\ntrainingTimePerIter: Double \u003d 0.7638235294117647\n"
-      },
-      "dateCreated": "Oct 5, 2015 12:08:56 PM",
-      "dateStarted": "Oct 12, 2015 10:47:24 AM",
-      "dateFinished": "Oct 12, 2015 10:47:38 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Print statistics\nprintln(s\"R2: ${r2}\")\nprintln(s\"Iterations: ${iters}\")\nprintln(s\"Training time per iter: ${trainingTimePerIter} seconds\")",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444440910099_1714338510",
-      "id": "20151009-183510_1200043993",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "R2: 0.9677118209276552\nIterations: 17\nTraining time per iter: 0.7638235294117647 seconds\n"
-      },
-      "dateCreated": "Oct 9, 2015 6:35:10 PM",
-      "dateStarted": "Oct 12, 2015 10:47:24 AM",
-      "dateFinished": "Oct 12, 2015 10:47:38 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// SystemML kernels\nval linearReg \u003d\n\"\"\"\n#\n# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM\n#\n# INPUT PARAMETERS:\n# --------------------------------------------------------------------------------------------\n# NAME  TYPE   DEFAULT  MEANING\n# --------------------------------------------------------------------------------------------\n# X     String  ---     Matrix X of feature vectors\n# Y     String  ---     1-column Matrix Y of response values\n# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:\n#                       0 \u003d no intercept, no shifting, no rescaling;\n#                       1 \u003d add intercept, but neither shift nor rescale X;\n#                       2 \u003d add intercept, shift \u0026 rescale X columns to mean \u003d 0, variance \u003d 1\n# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; set to nonzero\n#                       for hi
 ghly dependend/sparse/numerous features\n# tol   Double 0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if\n#                       L2 norm of the beta-residual is less than tolerance * its initial norm\n# maxi  Int      0      Maximum number of conjugate gradient iterations, 0 \u003d no maximum\n# --------------------------------------------------------------------------------------------\n#\n# OUTPUT:\n# B Estimated regression parameters (the betas) to store\n#\n# Note: Matrix of regression parameters (the betas) and its size depend on icpt input value:\n#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:\n# icpt\u003d0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B\n# icpt\u003d1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]\n# icpt\u003d2: ncol(X)+1 x 2  Col.1: betas for X \u0026 intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)
 +1, 1]\n#                        Col.2: betas for shifted/rescaled X and intercept\n#\n\nfileX \u003d \"\";\nfileY \u003d \"\";\nfileB \u003d \"\";\n\nintercept_status \u003d ifdef ($icpt, 0);     # $icpt\u003d0;\ntolerance \u003d ifdef ($tol, 0.000001);      # $tol\u003d0.000001;\nmax_iteration \u003d ifdef ($maxi, 0);        # $maxi\u003d0;\nregularization \u003d ifdef ($reg, 0.000001); # $reg\u003d0.000001;\n\nX \u003d read (fileX);\ny \u003d read (fileY);\n\nn \u003d nrow (X);\nm \u003d ncol (X);\nones_n \u003d matrix (1, rows \u003d n, cols \u003d 1);\nzero_cell \u003d matrix (0, rows \u003d 1, cols \u003d 1);\n\n# Introduce the intercept, shift and rescale the columns of X if needed\n\nm_ext \u003d m;\nif (intercept_status \u003d\u003d 1 | intercept_status \u003d\u003d 2)  # add the intercept column\n{\n    X \u003d append (X, ones_n);\n    m_ext \u003d ncol (X);\n}\n\nscale_lambda \u003d matrix (1, rows \u003d m_ext, cols \u003d 1);\nif (intercept_status \u003d\u003d 1 | inte
 rcept_status \u003d\u003d 2)\n{\n    scale_lambda [m_ext, 1] \u003d 0;\n}\n\nif (intercept_status \u003d\u003d 2)  # scale-\u0026-shift X columns to mean 0, variance 1\n{                           # Important assumption: X [, m_ext] \u003d ones_n\n    avg_X_cols \u003d t(colSums(X)) / n;\n    var_X_cols \u003d (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);\n    is_unsafe \u003d ppred (var_X_cols, 0.0, \"\u003c\u003d\");\n    scale_X \u003d 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);\n    scale_X [m_ext, 1] \u003d 1;\n    shift_X \u003d - avg_X_cols * scale_X;\n    shift_X [m_ext, 1] \u003d 0;\n} else {\n    scale_X \u003d matrix (1, rows \u003d m_ext, cols \u003d 1);\n    shift_X \u003d matrix (0, rows \u003d m_ext, cols \u003d 1);\n}\n\n# Henceforth, if intercept_status \u003d\u003d 2, we use \"X %*% (SHIFT/SCALE TRANSFORM)\"\n# instead of \"X\".  However, in order to preserve the sparsity of X,\n# we apply the transform associatively to some other part of the ex
 pression\n# in which it occurs.  To avoid materializing a large matrix, we rewrite it:\n#\n# ssX_A  \u003d (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:\n# ssX_A  \u003d diag (scale_X) %*% A;\n# ssX_A [m_ext, ] \u003d ssX_A [m_ext, ] + t(shift_X) %*% A;\n#\n# tssX_A \u003d t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:\n# tssX_A \u003d diag (scale_X) %*% A + shift_X %*% A [m_ext, ];\n\nlambda \u003d scale_lambda * regularization;\nbeta_unscaled \u003d matrix (0, rows \u003d m_ext, cols \u003d 1);\n\nif (max_iteration \u003d\u003d 0) {\n    max_iteration \u003d m_ext;\n}\ni \u003d 0;\n\n# BEGIN THE CONJUGATE GRADIENT ALGORITHM\nr \u003d - t(X) %*% y;\n\nif (intercept_status \u003d\u003d 2) {\n    r \u003d scale_X * r + shift_X %*% r [m_ext, ];\n}\n\np \u003d - r;\nnorm_r2 \u003d sum (r ^ 2);\nnorm_r2_initial \u003d norm_r2;\nnorm_r2_target \u003d norm_r2_initial * tolerance ^ 2;\n\nwhile (i \u003c max_iteration \u0026 norm_r2 \u003e norm_r2_target)\n{\n    if (intercep
 t_status \u003d\u003d 2) {\n        ssX_p \u003d scale_X * p;\n        ssX_p [m_ext, ] \u003d ssX_p [m_ext, ] + t(shift_X) %*% p;\n    } else {\n        ssX_p \u003d p;\n    }\n\n    q \u003d t(X) %*% (X %*% ssX_p);\n\n    if (intercept_status \u003d\u003d 2) {\n        q \u003d scale_X * q + shift_X %*% q [m_ext, ];\n    }\n\n    q \u003d q + lambda * p;\n    a \u003d norm_r2 / sum (p * q);\n    beta_unscaled \u003d beta_unscaled + a * p;\n    r \u003d r + a * q;\n    old_norm_r2 \u003d norm_r2;\n    norm_r2 \u003d sum (r ^ 2);\n    p \u003d -r + (norm_r2 / old_norm_r2) * p;\n    i \u003d i + 1;\n}\n# END THE CONJUGATE GRADIENT ALGORITHM\n\nif (intercept_status \u003d\u003d 2) {\n    beta \u003d scale_X * beta_unscaled;\n    beta [m_ext, ] \u003d beta [m_ext, ] + t(shift_X) %*% beta_unscaled;\n} else {\n    beta \u003d beta_unscaled;\n}\n\n# Output statistics\navg_tot \u003d sum (y) / n;\nss_tot \u003d sum (y ^ 2);\nss_avg_tot \u003d ss_tot - n * avg_tot ^ 2;\nvar_tot \u003d ss_avg
 _tot / (n - 1);\ny_residual \u003d y - X %*% beta;\navg_res \u003d sum (y_residual) / n;\nss_res \u003d sum (y_residual ^ 2);\nss_avg_res \u003d ss_res - n * avg_res ^ 2;\n\nR2_temp \u003d 1 - ss_res / ss_avg_tot\nR2 \u003d matrix(R2_temp, rows\u003d1, cols\u003d1)\nwrite(R2, \"\")\n\ntotalIters \u003d matrix(i, rows\u003d1, cols\u003d1)\nwrite(totalIters, \"\")\n\n# Prepare the output matrix\nif (intercept_status \u003d\u003d 2) {\n    beta_out \u003d append (beta, beta_unscaled);\n} else {\n    beta_out \u003d beta;\n}\n\nwrite (beta_out, fileB);\n\"\"\"",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala",
-        "tableHide": true,
-        "editorHide": false
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444067747401_589515959",
-      "id": "20151005-105547_1888511498",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "linearReg: String \u003d \n\"\n#\n# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM\n#\n# INPUT PARAMETERS:\n# --------------------------------------------------------------------------------------------\n# NAME  TYPE   DEFAULT  MEANING\n# --------------------------------------------------------------------------------------------\n# X     String  ---     Matrix X of feature vectors\n# Y     String  ---     1-column Matrix Y of response values\n# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:\n#                       0 \u003d no intercept, no shifting, no rescaling;\n#                       1 \u003d add intercept, but neither shift nor rescale X;\n#                       2 \u003d add intercept, shift \u0026 rescale X columns to mean \u003d 0, variance \u003d 1\n# reg   Doub..."
-      },
-      "dateCreated": "Oct 5, 2015 10:55:47 AM",
-      "dateStarted": "Oct 12, 2015 10:47:38 AM",
-      "dateFinished": "Oct 12, 2015 10:47:38 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// SystemML Kernels\nval linearRegFull \u003d\n\"\"\"\n#-------------------------------------------------------------\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n#-------------------------------------------------------------\n\n#\n# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJUGATE GRADIENT ALGORITHM\n#\n# INPUT PARAMETERS:\n# --------------------------------------------------------------------------------------------\n# NAME  TYPE   DEFAULT  MEANIN
 G\n# --------------------------------------------------------------------------------------------\n# X     String  ---     Location (on HDFS) to read the matrix X of feature vectors\n# Y     String  ---     Location (on HDFS) to read the 1-column matrix Y of response values\n# B     String  ---     Location to store estimated regression parameters (the betas)\n# O     String  \" \"     Location to write the printed statistics; by default is standard output\n# Log   String  \" \"     Location to write per-iteration variables for log/debugging purposes\n# icpt  Int      0      Intercept presence, shifting and rescaling the columns of X:\n#                       0 \u003d no intercept, no shifting, no rescaling;\n#                       1 \u003d add intercept, but neither shift nor rescale X;\n#                       2 \u003d add intercept, shift \u0026 rescale X columns to mean \u003d 0, variance \u003d 1\n# reg   Double 0.000001 Regularization constant (lambda) for L2-regularization; 
 set to nonzero\n#                       for highly dependend/sparse/numerous features\n# tol   Double 0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if\n#                       L2 norm of the beta-residual is less than tolerance * its initial norm\n# maxi  Int      0      Maximum number of conjugate gradient iterations, 0 \u003d no maximum\n# fmt   String \"text\"   Matrix output format for B (the betas) only, usually \"text\" or \"csv\"\n# --------------------------------------------------------------------------------------------\n# OUTPUT: Matrix of regression parameters (the betas) and its size depend on icpt input value:\n#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM X AND B:\n# icpt\u003d0: ncol(X)   x 1  Betas for X only                Y ~ X %*% B[1:ncol(X), 1], or just X %*% B\n# icpt\u003d1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]\n# icpt\u003d2: ncol(X)+1 x 2  C
 ol.1: betas for X \u0026 intercept  Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1]\n#                        Col.2: betas for shifted/rescaled X and intercept\n#\n# In addition, some regression statistics are provided in CSV format, one comma-separated\n# name-value pair per each line, as follows:\n#\n# NAME                  MEANING\n# -------------------------------------------------------------------------------------\n# AVG_TOT_Y             Average of the response value Y\n# STDEV_TOT_Y           Standard Deviation of the response value Y\n# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual bias\n# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)\n# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # deg. fr.\n# PLAIN_R2              Plain R^2 of residual with bias included vs. total average\n# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total average\n# PLAIN_R2_NOBIAS       Plain R^2
  of residual with bias subtracted vs. total average\n# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. total average\n# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero constant\n# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero constant\n# -------------------------------------------------------------------------------------\n# * The last two statistics are only printed if there is no intercept (icpt\u003d0)\n#\n# The Log file, when requested, contains the following per-iteration variables in CSV\n# format, each line containing triple (NAME, ITERATION, VALUE) with ITERATION \u003d 0 for\n# initial values:\n#\n# NAME                  MEANING\n# -------------------------------------------------------------------------------------\n# CG_RESIDUAL_NORM      L2-norm of Conj.Grad.residual, which is A %*% beta - t(X) %*% y\n#                           where A \u003d t(X) %*% X + diag (lambda), or a similar quantit
 y\n# CG_RESIDUAL_RATIO     Ratio of current L2-norm of Conj.Grad.residual over the initial\n# -------------------------------------------------------------------------------------\n#\n# HOW TO INVOKE THIS SCRIPT - EXAMPLE:\n# hadoop jar SystemML.jar -f LinearRegCG.dml -nvargs X\u003dINPUT_DIR/X Y\u003dINPUT_DIR/Y B\u003dOUTPUT_DIR/B\n#     O\u003dOUTPUT_DIR/Out icpt\u003d2 reg\u003d1.0 tol\u003d0.001 maxi\u003d100 fmt\u003dcsv Log\u003dOUTPUT_DIR/log\n\nfileX \u003d \"\";\nfileY \u003d \"\";\nfileB \u003d \"\";\nfileO \u003d ifdef ($O, \" \");\nfileLog \u003d ifdef ($Log, \" \");\nfmtB \u003d ifdef ($fmt, \"text\");\n\nintercept_status \u003d ifdef ($icpt, 0);     # $icpt\u003d0;\ntolerance \u003d ifdef ($tol, 0.000001);      # $tol\u003d0.000001;\nmax_iteration \u003d ifdef ($maxi, 0);        # $maxi\u003d0;\nregularization \u003d ifdef ($reg, 0.000001); # $reg\u003d0.000001;\n\nprint (\"BEGIN LINEAR REGRESSION SCRIPT\");\nprint (\"Reading X and Y...\");\nX \u003d read (fileX);\ny 
 \u003d read (fileY);\n\nn \u003d nrow (X);\nm \u003d ncol (X);\nones_n \u003d matrix (1, rows \u003d n, cols \u003d 1);\nzero_cell \u003d matrix (0, rows \u003d 1, cols \u003d 1);\n\n# Introduce the intercept, shift and rescale the columns of X if needed\n\nm_ext \u003d m;\nif (intercept_status \u003d\u003d 1 | intercept_status \u003d\u003d 2)  # add the intercept column\n{\n    X \u003d append (X, ones_n);\n    m_ext \u003d ncol (X);\n}\n\nscale_lambda \u003d matrix (1, rows \u003d m_ext, cols \u003d 1);\nif (intercept_status \u003d\u003d 1 | intercept_status \u003d\u003d 2)\n{\n    scale_lambda [m_ext, 1] \u003d 0;\n}\n\nif (intercept_status \u003d\u003d 2)  # scale-\u0026-shift X columns to mean 0, variance 1\n{                           # Important assumption: X [, m_ext] \u003d ones_n\n    avg_X_cols \u003d t(colSums(X)) / n;\n    var_X_cols \u003d (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);\n    is_unsafe \u003d ppred (var_X_cols, 0.0, \"\u003c\u003d\");\n    scale_
 X \u003d 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);\n    scale_X [m_ext, 1] \u003d 1;\n    shift_X \u003d - avg_X_cols * scale_X;\n    shift_X [m_ext, 1] \u003d 0;\n} else {\n    scale_X \u003d matrix (1, rows \u003d m_ext, cols \u003d 1);\n    shift_X \u003d matrix (0, rows \u003d m_ext, cols \u003d 1);\n}\n\n# Henceforth, if intercept_status \u003d\u003d 2, we use \"X %*% (SHIFT/SCALE TRANSFORM)\"\n# instead of \"X\".  However, in order to preserve the sparsity of X,\n# we apply the transform associatively to some other part of the expression\n# in which it occurs.  To avoid materializing a large matrix, we rewrite it:\n#\n# ssX_A  \u003d (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:\n# ssX_A  \u003d diag (scale_X) %*% A;\n# ssX_A [m_ext, ] \u003d ssX_A [m_ext, ] + t(shift_X) %*% A;\n#\n# tssX_A \u003d t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:\n# tssX_A \u003d diag (scale_X) %*% A + shift_X %*% A [m_ext, ];\n\nlambda \u003d scale_lambda * regulariza
 tion;\nbeta_unscaled \u003d matrix (0, rows \u003d m_ext, cols \u003d 1);\n\nif (max_iteration \u003d\u003d 0) {\n    max_iteration \u003d m_ext;\n}\ni \u003d 0;\n\n# BEGIN THE CONJUGATE GRADIENT ALGORITHM\nprint (\"Running the CG algorithm...\");\n\nr \u003d - t(X) %*% y;\n\nif (intercept_status \u003d\u003d 2) {\n    r \u003d scale_X * r + shift_X %*% r [m_ext, ];\n}\n\np \u003d - r;\nnorm_r2 \u003d sum (r ^ 2);\nnorm_r2_initial \u003d norm_r2;\nnorm_r2_target \u003d norm_r2_initial * tolerance ^ 2;\nprint (\"||r|| initial value \u003d \" + sqrt (norm_r2_initial) + \",  target value \u003d \" + sqrt (norm_r2_target));\nlog_str \u003d \"CG_RESIDUAL_NORM,0,\" + sqrt (norm_r2_initial);\nlog_str \u003d append (log_str, \"CG_RESIDUAL_RATIO,0,1.0\");\n\nwhile (i \u003c max_iteration \u0026 norm_r2 \u003e norm_r2_target)\n{\n    if (intercept_status \u003d\u003d 2) {\n        ssX_p \u003d scale_X * p;\n        ssX_p [m_ext, ] \u003d ssX_p [m_ext, ] + t(shift_X) %*% p;\n    } else {\n    
     ssX_p \u003d p;\n    }\n    \n    q \u003d t(X) %*% (X %*% ssX_p);\n\n    if (intercept_status \u003d\u003d 2) {\n        q \u003d scale_X * q + shift_X %*% q [m_ext, ];\n    }\n\n\tq \u003d q + lambda * p;\n\ta \u003d norm_r2 / sum (p * q);\n\tbeta_unscaled \u003d beta_unscaled + a * p;\n\tr \u003d r + a * q;\n\told_norm_r2 \u003d norm_r2;\n\tnorm_r2 \u003d sum (r ^ 2);\n\tp \u003d -r + (norm_r2 / old_norm_r2) * p;\n\ti \u003d i + 1;\n\tprint (\"Iteration \" + i + \":  ||r|| / ||r init|| \u003d \" + sqrt (norm_r2 / norm_r2_initial));\n\tlog_str \u003d append (log_str, \"CG_RESIDUAL_NORM,\"  + i + \",\" + sqrt (norm_r2));\n    log_str \u003d append (log_str, \"CG_RESIDUAL_RATIO,\" + i + \",\" + sqrt (norm_r2 / norm_r2_initial));\n}\n\nif (i \u003e\u003d max_iteration) {\n    print (\"Warning: the maximum number of iterations has been reached.\");\n}\nprint (\"The CG algorithm is done.\");\n# END THE CONJUGATE GRADIENT ALGORITHM\n\nif (intercept_status \u003d\u003d 2) {\n    beta
  \u003d scale_X * beta_unscaled;\n    beta [m_ext, ] \u003d beta [m_ext, ] + t(shift_X) %*% beta_unscaled;\n} else {\n    beta \u003d beta_unscaled;\n}\n\nprint (\"Computing the statistics...\");\n\navg_tot \u003d sum (y) / n;\nss_tot \u003d sum (y ^ 2);\nss_avg_tot \u003d ss_tot - n * avg_tot ^ 2;\nvar_tot \u003d ss_avg_tot / (n - 1);\ny_residual \u003d y - X %*% beta;\navg_res \u003d sum (y_residual) / n;\nss_res \u003d sum (y_residual ^ 2);\nss_avg_res \u003d ss_res - n * avg_res ^ 2;\n\nplain_R2 \u003d 1 - ss_res / ss_avg_tot;\nif (n \u003e m_ext) {\n    dispersion  \u003d ss_res / (n - m_ext);\n    adjusted_R2 \u003d 1 - dispersion / (ss_avg_tot / (n - 1));\n} else {\n    dispersion  \u003d 0.0 / 0.0;\n    adjusted_R2 \u003d 0.0 / 0.0;\n}\n\nplain_R2_nobias \u003d 1 - ss_avg_res / ss_avg_tot;\ndeg_freedom \u003d n - m - 1;\nif (deg_freedom \u003e 0) {\n    var_res \u003d ss_avg_res / deg_freedom;\n    adjusted_R2_nobias \u003d 1 - var_res / (ss_avg_tot / (n - 1));\n} else {\n  
   var_res \u003d 0.0 / 0.0;\n    adjusted_R2_nobias \u003d 0.0 / 0.0;\n    print (\"Warning: zero or negative number of degrees of freedom.\");\n}\n\nplain_R2_vs_0 \u003d 1 - ss_res / ss_tot;\nif (n \u003e m) {\n    adjusted_R2_vs_0 \u003d 1 - (ss_res / (n - m)) / (ss_tot / n);\n} else {\n    adjusted_R2_vs_0 \u003d 0.0 / 0.0;\n}\n\nstr \u003d \"AVG_TOT_Y,\" + avg_tot;                                    #  Average of the response value Y\nstr \u003d append (str, \"STDEV_TOT_Y,\" + sqrt (var_tot));             #  Standard Deviation of the response value Y\nstr \u003d append (str, \"AVG_RES_Y,\" + avg_res);                      #  Average of the residual Y - pred(Y|X), i.e. residual bias\nstr \u003d append (str, \"STDEV_RES_Y,\" + sqrt (var_res));             #  Standard Deviation of the residual Y - pred(Y|X)\nstr \u003d append (str, \"DISPERSION,\" + dispersion);                  #  GLM-style dispersion, i.e. residual sum of squares / # d.f.\nstr \u003d append (str, \"PLAIN_R2,\" + 
 plain_R2);                      #  Plain R^2 of residual with bias included vs. total average\nstr \u003d append (str, \"ADJUSTED_R2,\" + adjusted_R2);                #  Adjusted R^2 of residual with bias included vs. total average\nstr \u003d append (str, \"PLAIN_R2_NOBIAS,\" + plain_R2_nobias);        #  Plain R^2 of residual with bias subtracted vs. total average\nstr \u003d append (str, \"ADJUSTED_R2_NOBIAS,\" + adjusted_R2_nobias);  #  Adjusted R^2 of residual with bias subtracted vs. total average\nif (intercept_status \u003d\u003d 0) {\n    str \u003d append (str, \"PLAIN_R2_VS_0,\" + plain_R2_vs_0);        #  Plain R^2 of residual with bias included vs. zero constant\n    str \u003d append (str, \"ADJUSTED_R2_VS_0,\" + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant\n}\n\nif (fileO !\u003d \" \") {\n    write (str, fileO);\n} else {\n    print (str);\n}\n\n# Prepare the output matrix\nprint (\"Writing the output matrix...\");\n\nif (inter
 cept_status \u003d\u003d 2) {\n    beta_out \u003d append (beta, beta_unscaled);\n} else {\n    beta_out \u003d beta;\n}\nwrite (beta_out, fileB, format\u003dfmtB);\n\nif (fileLog !\u003d \" \") {\n    write (log_str, fileLog);\n}\nprint (\"END LINEAR REGRESSION SCRIPT\");\n\"\"\"",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala",
-        "editorHide": true,
-        "tableHide": true
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444365784575_-1898057033",
-      "id": "20151008-214304_2123048601",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "linearRegFull: String \u003d \n\"\n#-------------------------------------------------------------\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n#-------------------------------------------------------------\n\n#\n# THIS SCRIPT SOLVES LINEAR REGRESSION USING THE CONJU..."
-      },
-      "dateCreated": "Oct 8, 2015 9:43:04 PM",
-      "dateStarted": "Oct 12, 2015 10:47:38 AM",
-      "dateFinished": "Oct 12, 2015 10:47:38 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Helper functions\nimport org.apache.sysml.api.MLOutput\n\ndef getScalar(outputs: MLOutput, symbol: String): Any \u003d\n    outputs.getDF(sqlContext, symbol).first()(1)\n    \ndef getScalarDouble(outputs: MLOutput, symbol: String): Double \u003d \n    getScalar(outputs, symbol).asInstanceOf[Double]\n    \ndef getScalarInt(outputs: MLOutput, symbol: String): Int \u003d\n    getScalarDouble(outputs, symbol).toInt",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444428901112_1314919146",
-      "id": "20151009-151501_511642642",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "import org.apache.sysml.api.MLOutput\ngetScalar: (outputs: org.apache.sysml.api.MLOutput, symbol: String)Any\ngetScalarDouble: (outputs: org.apache.sysml.api.MLOutput, symbol: String)Double\ngetScalarInt: (outputs: org.apache.sysml.api.MLOutput, symbol: String)Int\n"
-      },
-      "dateCreated": "Oct 9, 2015 3:15:01 PM",
-      "dateStarted": "Oct 12, 2015 10:47:38 AM",
-      "dateFinished": "Oct 12, 2015 10:47:39 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Imports\nimport org.apache.sysml.api.MLContext\nimport org.apache.sysml.runtime.instructions.spark.utils.{RDDConverterUtilsExt \u003d\u003e RDDConverterUtils}\nimport org.apache.sysml.runtime.matrix.MatrixCharacteristics;\n\n// Create SystemML context\nval ml \u003d new MLContext(sc)\n\n// Convert data to proper format\nval mcX \u003d new MatrixCharacteristics(numRows, numCols, 1000, 1000)\nval mcY \u003d new MatrixCharacteristics(numRows, 1, 1000, 1000)\nval X \u003d RDDConverterUtils.vectorDataFrameToBinaryBlock(sc, data, mcX, false, \"features\")\nval y \u003d RDDConverterUtils.dataFrameToBinaryBlock(sc, data.select(\"label\"), mcY, false)\n// val y \u003d data.select(\"label\")\n\n// Cache\nval X2 \u003d X.cache()\nval y2 \u003d y.cache()\nval cnt1 \u003d X2.count()\nval cnt2 \u003d y2.count() ",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444068596053_-555488546",
-      "id": "20151005-110956_169115151",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "import org.apache.sysml.api.MLContext\nimport org.apache.sysml.runtime.instructions.spark.utils.{RDDConverterUtilsExt\u003d\u003eRDDConverterUtils}\nimport org.apache.sysml.runtime.matrix.MatrixCharacteristics\nml: org.apache.sysml.api.MLContext \u003d org.apache.sysml.api.MLContext@38d59245\nmcX: org.apache.sysml.runtime.matrix.MatrixCharacteristics \u003d [10000 x 1000, nnz\u003d-1, blocks (1000 x 1000)]\nmcY: org.apache.sysml.runtime.matrix.MatrixCharacteristics \u003d [10000 x 1, nnz\u003d-1, blocks (1000 x 1000)]\nX: org.apache.spark.api.java.JavaPairRDD[org.apache.sysml.runtime.matrix.data.MatrixIndexes,org.apache.sysml.runtime.matrix.data.MatrixBlock] \u003d org.apache.spark.api.java.JavaPairRDD@b5a86e3\ny: org.apache.spark.api.java.JavaPairRDD[org.apache.sysml.runtime.matrix.data.MatrixIndexes,org.apache.sysml.runtime.matrix.data.MatrixBlock] \u003d org.apache.spark.api.java.JavaPairRDD@56377665\nX2: org.apache.spark.api.java.JavaPairRDD[org.apache.sysml.runt
 ime.matrix.data.MatrixIndexes,org.apache.sysml.runtime.matrix.data.MatrixBlock] \u003d org.apache.spark.api.java.JavaPairRDD@650f29d2\ny2: org.apache.spark.api.java.JavaPairRDD[org.apache.sysml.runtime.matrix.data.MatrixIndexes,org.apache.sysml.runtime.matrix.data.MatrixBlock] \u003d org.apache.spark.api.java.JavaPairRDD@334857a8\ncnt1: Long \u003d 10\ncnt2: Long \u003d 10\n"
-      },
-      "dateCreated": "Oct 5, 2015 11:09:56 AM",
-      "dateStarted": "Oct 12, 2015 10:47:39 AM",
-      "dateFinished": "Oct 12, 2015 10:47:43 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Register inputs \u0026 outputs\nml.reset()  \nml.registerInput(\"X\", X, numRows, numCols)\nml.registerInput(\"y\", y, numRows, 1)\n// ml.registerInput(\"y\", y)\nml.registerOutput(\"beta_out\")\nml.registerOutput(\"R2\")\nml.registerOutput(\"totalIters\")\n\n// Run the script\nval start \u003d System.currentTimeMillis()\nval outputs \u003d ml.executeScript(linearReg)\nval trainingTime \u003d (System.currentTimeMillis() - start).toDouble / 1000.0\n\n// Get outputs\nval B \u003d outputs.getDF(sqlContext, \"beta_out\").sort(\"ID\").drop(\"ID\")\nval r2 \u003d getScalarDouble(outputs, \"R2\")\nval iters \u003d getScalarInt(outputs, \"totalIters\")\nval trainingTimePerIter \u003d trainingTime / iters",
-      "dateUpdated": "Oct 12, 2015 10:48:10 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444671982719_328618024",
-      "id": "20151012-104622_1349641375",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "start: Long \u003d 1444672090620\noutputs: org.apache.sysml.api.MLOutput \u003d org.apache.sysml.api.MLOutput@5d2c22d0\ntrainingTime: Double \u003d 1.176\nB: org.apache.spark.sql.DataFrame \u003d [C1: double]\nr2: Double \u003d 0.9677079547216473\niters: Int \u003d 12\ntrainingTimePerIter: Double \u003d 0.09799999999999999\n"
-      },
-      "dateCreated": "Oct 12, 2015 10:46:22 AM",
-      "dateStarted": "Oct 12, 2015 10:48:10 AM",
-      "dateFinished": "Oct 12, 2015 10:48:12 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "text": "// Print statistics\nprintln(s\"R2: ${r2}\")\nprintln(s\"Iterations: ${iters}\")\nprintln(s\"Training time per iter: ${trainingTimePerIter} seconds\")\nB.describe().show()",
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "tableHide": false,
-        "editorHide": false,
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444081221119_-327699254",
-      "id": "20151005-144021_55411373",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT",
-        "msg": "R2: 0.9677079547216473\nIterations: 12\nTraining time per iter: 0.2334166666666667 seconds\n+-------+-------------------+\n|summary|                 C1|\n+-------+-------------------+\n|  count|               1000|\n|   mean| 0.0184500840658385|\n| stddev| 0.2764750319432085|\n|    min|-0.5426068958986378|\n|    max| 0.5225309861616542|\n+-------+-------------------+\n\n"
-      },
-      "dateCreated": "Oct 5, 2015 2:40:21 PM",
-      "dateStarted": "Oct 12, 2015 10:47:43 AM",
-      "dateFinished": "Oct 12, 2015 10:47:50 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    },
-    {
-      "dateUpdated": "Oct 12, 2015 10:47:23 AM",
-      "config": {
-        "colWidth": 12.0,
-        "graph": {
-          "mode": "table",
-          "height": 300.0,
-          "optionOpen": false,
-          "keys": [],
-          "values": [],
-          "groups": [],
-          "scatter": {}
-        },
-        "editorMode": "ace/mode/scala"
-      },
-      "settings": {
-        "params": {},
-        "forms": {}
-      },
-      "jobName": "paragraph_1444422131984_536286492",
-      "id": "20151009-132211_1399012872",
-      "result": {
-        "code": "SUCCESS",
-        "type": "TEXT"
-      },
-      "dateCreated": "Oct 9, 2015 1:22:11 PM",
-      "dateStarted": "Oct 12, 2015 10:47:47 AM",
-      "dateFinished": "Oct 12, 2015 10:47:50 AM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
-    }
-  ],
-  "name": "SystemML - Linear Regression",
-  "id": "2AZ2AQ12B",
-  "angularObjects": {
-    "2AZ9BN82Z": [],
-    "2AZWVF2GZ": [],
-    "2AZ78WFZ8": [],
-    "2B16GQAY6": [],
-    "2AXNVS2AP": [],
-    "2AXQ86QRG": [],
-    "2AZR88MK4": [],
-    "2AY2CE5DY": [],
-    "2AZRRUZZU": [],
-    "2AY16128C": [],
-    "2AX3MFKQ2": [],
-    "2AWQWADKQ": [],
-    "2AYCYP1MW": [],
-    "2AZYXB1NC": []
-  },
-  "config": {},
-  "info": {}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/345e1ab4/samples/zeppelin-notebooks/SystemML_LinearRegCG.json
----------------------------------------------------------------------
diff --git a/samples/zeppelin-notebooks/SystemML_LinearRegCG.json b/samples/zeppelin-notebooks/SystemML_LinearRegCG.json
new file mode 100644
index 0000000..1dc91cb
--- /dev/null
+++ b/samples/zeppelin-notebooks/SystemML_LinearRegCG.json
@@ -0,0 +1 @@
+{"paragraphs":[{"user":"anonymous","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala"},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1494545879068_-1034995073","id":"20170511-163759_12734001","dateCreated":"2017-05-11T16:37:59-0700","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:8101","text":"%dep\r\nz.load(\"org.apache.systemml:systemml:0.14.0-incubating\")","dateUpdated":"2017-05-11T16:38:53-0700","dateFinished":"2017-05-11T16:38:54-0700","dateStarted":"2017-05-11T16:38:53-0700","results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"res1: org.apache.zeppelin.dep.Dependency = org.apache.zeppelin.dep.Dependency@7e785cff\n"}]}},{"text":"sc.version","user":"anonymous","dateUpdated":"2017-05-11T16:38:53-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"form
 s":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nres0: String = 2.1.0\n"}]},"apps":[],"jobName":"paragraph_1494543525010_-885655540","id":"20170511-144323_52625604","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:38:54-0700","dateFinished":"2017-05-11T16:39:07-0700","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:7688"},{"text":"import org.apache.sysml.api.mlcontext._","user":"anonymous","dateUpdated":"2017-05-11T16:38:53-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nimport org.apache.sysml.api.mlcontext._\n"}]},"apps":[],"jobName":"paragraph_1494543525011_-886040289","id":"20170511-144349_335242548","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:38:54-0700","dateFinished":"2017-05-11T16:39:08-0700","status":"FIN
 ISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7689"},{"text":"val ml = new MLContext(sc)","user":"anonymous","dateUpdated":"2017-05-11T16:38:53-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nml: org.apache.sysml.api.mlcontext.MLContext = org.apache.sysml.api.mlcontext.MLContext@403d1309\n"}]},"apps":[],"jobName":"paragraph_1494543525011_-886040289","id":"20170511-144600_1148672764","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:08-0700","dateFinished":"2017-05-11T16:39:08-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7690"},{"text":"ml.info","user":"anonymous","dateUpdated":"2017-05-11T16:38:53-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"
 forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\n\n\n\n\n\n\n\n\n\n\n\nres1: org.apache.sysml.api.mlcontext.ProjectInfo =\nArchiver-Version: Plexus Archiver\nArtifact-Id: systemml\nBuild-Jdk: 1.8.0_121\nBuild-Time: 2017-04-19 21:45:10 UTC\nBuilt-By: asurve\nCreated-By: Apache Maven 3.3.9\nGroup-Id: org.apache.systemml\nMain-Class: org.apache.sysml.api.DMLScript\nManifest-Version: 1.0\nMinimum-Recommended-Spark-Version: 2.1.0\nVersion: 0.14.0-incubating\n"}]},"apps":[],"jobName":"paragraph_1494543525012_-887964033","id":"20170511-145343_677848491","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:08-0700","dateFinished":"2017-05-11T16:39:08-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7691"},{"text":"import org.apache.spark.mllib.util.LinearDataGenerator\nimport org.apache.spark.mllib.linalg.Vector\nimport org.apache.spark.sql._\nimport org.apache.spark.sql.types.{StructType,StructField,DoubleType,StringType
 ,IntegerType}","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nimport org.apache.spark.mllib.util.LinearDataGenerator\n\nimport org.apache.spark.mllib.linalg.Vector\n\nimport org.apache.spark.sql._\n\nimport org.apache.spark.sql.types.{StructType, StructField, DoubleType, StringType, IntegerType}\n"}]},"apps":[],"jobName":"paragraph_1494543525012_-887964033","id":"20170511-145414_1435992614","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:08-0700","dateFinished":"2017-05-11T16:39:09-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7692"},{"text":"val nRows = 1000; val nCols = 20","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"e
 nabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\n\nnRows: Int = 1000\nnCols: Int = 20\n"}]},"apps":[],"jobName":"paragraph_1494543525012_-887964033","id":"20170511-145542_2060715456","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:09-0700","dateFinished":"2017-05-11T16:39:10-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7693"},{"text":"val data = LinearDataGenerator.generateLinearRDD(sc, nRows, nCols, 0.001).toDF\nval dataX = data.select(\"features\").rdd.map{ v => Row.fromSeq(v(0).asInstanceOf[Vector].toArray)}\nval schemaX = StructType((1 to nCols).map { i => StructField(\"C\" + i, DoubleType, true) } )","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"c
 ode":"SUCCESS","msg":[{"type":"TEXT","data":"\ndata: org.apache.spark.sql.DataFrame = [label: double, features: vector]\n\ndataX: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[6] at map at <console>:41\n\nschemaX: org.apache.spark.sql.types.StructType = StructType(StructField(C1,DoubleType,true), StructField(C2,DoubleType,true), StructField(C3,DoubleType,true), StructField(C4,DoubleType,true), StructField(C5,DoubleType,true), StructField(C6,DoubleType,true), StructField(C7,DoubleType,true), StructField(C8,DoubleType,true), StructField(C9,DoubleType,true), StructField(C10,DoubleType,true), StructField(C11,DoubleType,true), StructField(C12,DoubleType,true), StructField(C13,DoubleType,true), StructField(C14,DoubleType,true), StructField(C15,DoubleType,true), StructField(C16,DoubleType,true), StructField(C17,DoubleType,true), StructField(C18,DoubleType,true), StructField(C19,DoubleType,true), StructField(C20,DoubleType,true))\n"}]},"apps":[],"jobName":"paragraph_
 1494543525013_-888348782","id":"20170511-145736_384788188","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:10-0700","dateFinished":"2017-05-11T16:39:14-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7694"},{"text":"val X = spark.createDataFrame(dataX,schemaX)\nval y = data.select(\"label\")","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nX: org.apache.spark.sql.DataFrame = [C1: double, C2: double ... 18 more fields]\n\ny: org.apache.spark.sql.DataFrame = [label: double]\n"}]},"apps":[],"jobName":"paragraph_1494543525013_-888348782","id":"20170511-150007_1200854412","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:10-0700","dateFinished":"2017-05-11T16:39:15-0700","status":"FINISHE
 D","progressUpdateIntervalMs":500,"$$hashKey":"object:7695"},{"text":"val LinRegCgDML = ScriptFactory.dmlFromResource(\"/scripts/algorithms/LinearRegCG.dml\")","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\n\n\n\n\n\nLinRegCgDML: org.apache.sysml.api.mlcontext.Script =\nInputs:\nNone\n\nOutputs:\nNone\n"}]},"apps":[],"jobName":"paragraph_1494543525014_-887194535","id":"20170511-150116_450421311","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:14-0700","dateFinished":"2017-05-11T16:39:15-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7696"},{"text":"val LinRegCg = LinRegCgDML.in(\"X\", X).in(\"y\", y).out(\"beta_out\")","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"colWidth":12,"e
 ditorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\n\n\n\n\n\n\nLinRegCg: org.apache.sysml.api.mlcontext.Script =\nInputs:\n  [1] (Dataset as Matrix) X: [C1: double, C2: double ... 18 more fields]\n  [2] (Dataset as Matrix) y: [label: double]\n\nOutputs:\n  [1] beta_out\n"}]},"apps":[],"jobName":"paragraph_1494543525014_-887194535","id":"20170511-150143_1437310013","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:15-0700","dateFinished":"2017-05-11T16:39:17-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7697"},{"text":"val res = ml.execute(LinRegCg)","user":"anonymous","dateUpdated":"2017-05-11T16:38:54-0700","config":{"tableHide":false,"editorSetting":{"language":"scala"},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true},"settings":{"params":{},"forms":{}},"results":{"co
 de":"SUCCESS","msg":[{"type":"TEXT","data":"\n\nres: org.apache.sysml.api.mlcontext.MLResults =\n  [1] (Matrix) beta_out: MatrixObject: scratch_space//_p10760_172.16.189.91//_t0/temp19_77, [20 x 1, nnz=20, blocks (1000 x 1000)], binaryblock, dirty\n"}]},"apps":[],"jobName":"paragraph_1494543525015_-887579284","id":"20170511-150218_888522247","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:16-0700","dateFinished":"2017-05-11T16:39:19-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7698"},{"text":"z.show(res.getDataFrame(\"beta_out\").sort(\"__INDEX\"))","user":"anonymous","dateUpdated":"2017-05-11T16:38:55-0700","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala"}},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"__INDEX\tC1\n1.0\t0.22758923148653765\n2.0\t0.18325331678413073\n3.0\t-0.19132020477989198\n4.0\t-0.222978646
 6645596\n5.0\t0.1655320728089567\n6.0\t0.4034581825273427\n7.0\t-0.13119697771587355\n8.0\t-0.22422151776382496\n9.0\t-0.03640812970196472\n10.0\t0.28305615669741196\n11.0\t0.41935757104525073\n12.0\t-0.06348947505950103\n13.0\t0.24988033284537162\n14.0\t-0.11344894712988449\n15.0\t-0.32272205772821355\n16.0\t0.09442021705906962\n17.0\t-0.29017864948719196\n18.0\t0.32589380033203724\n19.0\t-0.32768681591496096\n20.0\t0.08744302957224545\n"}]},"apps":[],"jobName":"paragraph_1494543525016_-889503029","id":"20170511-150414_1824148477","dateCreated":"2017-05-11T15:58:45-0700","dateStarted":"2017-05-11T16:39:17-0700","dateFinished":"2017-05-11T16:39:20-0700","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:7699"},{"text":"","user":"anonymous","dateUpdated":"2017-05-11T16:18:22-0700","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala"},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph
 _1494543525016_-889503029","id":"20170511-154623_2118188057","dateCreated":"2017-05-11T15:58:45-0700","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:7700"}],"name":"SystemML_LinearRegCG","id":"2CFKY21GZ","angularObjects":{"2CEM2EBHQ:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}}
\ No newline at end of file