You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2020/02/09 14:24:25 UTC
[zeppelin] branch master updated: [ZEPPELIN-4560] Restructure and improve spark tutorial note

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 3f2bf2c  [ZEPPELIN-4560] Restructure and improve spark tutorial note
3f2bf2c is described below

commit 3f2bf2cb6579ab61cdfbb43c66f37fae7ffe8b02
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Sun Jan 19 17:53:16 2020 +0800

    [ZEPPELIN-4560] Restructure and improve spark tutorial note
    
    ### What is this PR for?
    
    This PR restructure and improve spark tutorials, now here's tutorial notes for spark:
    * Basic Features (Spark)
    * Spark MlLib
    * Spark SQL (PySpark)
    * Spark SQL (Scala)
    * SparkR Shiny App
    * SparkR Basics
    
    ### What type of PR is it?
    [ Improvement  | Documentation ]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4560
    
    ### How should this be tested?
    * No test needed
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #3631 from zjffdu/ZEPPELIN-4560 and squashes the following commits:
    
    ed2bc4c4e [Jeff Zhang] [ZEPPELIN-4560] Restructure and improve spark tutorial note
---
 .../Spark Basic Features_2A94M5J1Z.zpln}           |  203 +++-
 notebook/Spark Tutorial/Spark MlLib_2EZFM3GJA.zpln |  476 ++++++++
 .../Spark SQL (PySpark)_2EWM84JXA.zpln             | 1184 ++++++++++++++++++++
 .../Spark SQL (Scala)_2EYUV26VR.zpln               | 1034 +++++++++++++++++
 .../Spark Tutorial/SparkR Basics_2BWJFTXKM.zpln    | 1119 ++++++++++++++++++
 .../Spark Tutorial/SparkR Shiny App_2F1CHQ4TT.zpln |  217 ++++
 6 files changed, 4175 insertions(+), 58 deletions(-)

diff --git a/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln b/notebook/Spark Tutorial/Spark Basic Features_2A94M5J1Z.zpln
similarity index 68%
rename from notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln
rename to notebook/Spark Tutorial/Spark Basic Features_2A94M5J1Z.zpln
index 4deba4a..a6d29da 100644
--- a/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln	
+++ b/notebook/Spark Tutorial/Spark Basic Features_2A94M5J1Z.zpln	
@@ -3,7 +3,7 @@
     {
       "text": "%md\n## Welcome to Zeppelin.\n##### This is a live tutorial, you can run the code yourself. (Shift-Enter to Run)",
       "user": "anonymous",
-      "dateUpdated": "Dec 17, 2016 3:32:15 PM",
+      "dateUpdated": "2016-12-17 15:32:15.000",
       "config": {
         "colWidth": 12.0,
         "editorHide": true,
@@ -42,19 +42,19 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423836981412_-1007008116",
       "id": "20150213-231621_168813393",
-      "dateCreated": "Feb 13, 2015 11:16:21 PM",
-      "dateStarted": "Dec 17, 2016 3:32:15 PM",
-      "dateFinished": "Dec 17, 2016 3:32:18 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-13 23:16:21.000",
+      "dateStarted": "2016-12-17 15:32:15.000",
+      "dateFinished": "2016-12-17 15:32:18.000",
+      "status": "FINISHED"
     },
     {
       "title": "Load data into table",
       "text": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don\u0027t need create them manually\n\n// load bank data\nval bankText \u003d sc.parallelize(\n    IOUtils.toString(\n        new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n        Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age [...]
       "user": "anonymous",
-      "dateUpdated": "Dec 17, 2016 3:30:09 PM",
+      "dateUpdated": "2020-01-21 22:58:52.064",
       "config": {
         "colWidth": 12.0,
         "title": true,
@@ -71,8 +71,11 @@
         ],
         "editorSetting": {
           "language": "scala",
-          "editOnDblClick": false
-        }
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "fontSize": 9.0
       },
       "settings": {
         "params": {},
@@ -83,23 +86,23 @@
         "msg": [
           {
             "type": "TEXT",
-            "data": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\nbankText: org.apache.spark.rdd.RDD[String] \u003d ParallelCollectionRDD[36] at parallelize at \u003cconsole\u003e:43\ndefined class Bank\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string ... 3 more fields]\nwarning: there were 1 deprecation warning(s); re-run with -deprecation for details\n"
+            "data": "\u001b[33mwarning: \u001b[0mthere was one deprecation warning; re-run with -deprecation for details\nimport sqlContext.implicits._\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\u001b[1m\u001b[34mbankText\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.rdd.RDD[String]\u001b[0m \u003d ParallelCollectionRDD[0] at parallelize at \u003cconsole\u003e:24\ndefined class Bank\n\u001b[1m\u001b[34mbank\u001b[0m: \u001b[1m\u001b[32mo [...]
           }
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423500779206_-1502780787",
       "id": "20150210-015259_1403135953",
-      "dateCreated": "Feb 10, 2015 1:52:59 AM",
-      "dateStarted": "Dec 17, 2016 3:30:09 PM",
-      "dateFinished": "Dec 17, 2016 3:30:58 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-10 01:52:59.000",
+      "dateStarted": "2020-01-21 22:58:52.084",
+      "dateFinished": "2020-01-21 22:59:18.740",
+      "status": "FINISHED"
     },
     {
       "text": "%sql \nselect age, count(1) value\nfrom bank \nwhere age \u003c 30 \ngroup by age \norder by age",
       "user": "anonymous",
-      "dateUpdated": "Mar 17, 2017 12:18:02 PM",
+      "dateUpdated": "2020-01-19 16:58:04.490",
       "config": {
         "colWidth": 4.0,
         "results": [
@@ -107,7 +110,31 @@
             "graph": {
               "mode": "multiBarChart",
               "height": 366.0,
-              "optionOpen": false
+              "optionOpen": false,
+              "setting": {
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "age",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "value",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
             },
             "helium": {}
           }
@@ -117,7 +144,8 @@
           "language": "sql",
           "editOnDblClick": false
         },
-        "editorMode": "ace/mode/sql"
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0
       },
       "settings": {
         "params": {},
@@ -133,18 +161,18 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423500782552_-1439281894",
       "id": "20150210-015302_1492795503",
-      "dateCreated": "Feb 10, 2015 1:53:02 AM",
-      "dateStarted": "Dec 17, 2016 3:30:13 PM",
-      "dateFinished": "Dec 17, 2016 3:31:04 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-10 01:53:02.000",
+      "dateStarted": "2016-12-17 15:30:13.000",
+      "dateFinished": "2016-12-17 15:31:04.000",
+      "status": "FINISHED"
     },
     {
       "text": "%sql \nselect age, count(1) value \nfrom bank \nwhere age \u003c ${maxAge\u003d30} \ngroup by age \norder by age",
       "user": "anonymous",
-      "dateUpdated": "Mar 17, 2017 12:17:39 PM",
+      "dateUpdated": "2020-01-19 16:58:04.541",
       "config": {
         "colWidth": 4.0,
         "results": [
@@ -152,7 +180,31 @@
             "graph": {
               "mode": "multiBarChart",
               "height": 294.0,
-              "optionOpen": false
+              "optionOpen": false,
+              "setting": {
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "age",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "value",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
             },
             "helium": {}
           }
@@ -162,7 +214,8 @@
           "language": "sql",
           "editOnDblClick": false
         },
-        "editorMode": "ace/mode/sql"
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0
       },
       "settings": {
         "params": {
@@ -170,6 +223,7 @@
         },
         "forms": {
           "maxAge": {
+            "type": "TextBox",
             "name": "maxAge",
             "defaultValue": "30",
             "hidden": false
@@ -186,18 +240,18 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423720444030_-1424110477",
       "id": "20150212-145404_867439529",
-      "dateCreated": "Feb 12, 2015 2:54:04 PM",
-      "dateStarted": "Dec 17, 2016 3:30:58 PM",
-      "dateFinished": "Dec 17, 2016 3:31:07 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-12 14:54:04.000",
+      "dateStarted": "2016-12-17 15:30:58.000",
+      "dateFinished": "2016-12-17 15:31:07.000",
+      "status": "FINISHED"
     },
     {
       "text": "%sql \nselect age, count(1) value \nfrom bank \nwhere marital\u003d\"${marital\u003dsingle,single|divorced|married}\" \ngroup by age \norder by age",
       "user": "anonymous",
-      "dateUpdated": "Mar 17, 2017 12:18:18 PM",
+      "dateUpdated": "2020-01-19 16:58:04.590",
       "config": {
         "colWidth": 4.0,
         "results": [
@@ -205,7 +259,31 @@
             "graph": {
               "mode": "stackedAreaChart",
               "height": 280.0,
-              "optionOpen": false
+              "optionOpen": false,
+              "setting": {
+                "stackedAreaChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "age",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "value",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
             },
             "helium": {}
           }
@@ -215,7 +293,9 @@
           "language": "sql",
           "editOnDblClick": false
         },
-        "editorMode": "ace/mode/sql"
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0,
+        "runOnSelectionChange": true
       },
       "settings": {
         "params": {
@@ -223,8 +303,7 @@
         },
         "forms": {
           "marital": {
-            "name": "marital",
-            "defaultValue": "single",
+            "type": "Select",
             "options": [
               {
                 "value": "single"
@@ -236,6 +315,8 @@
                 "value": "married"
               }
             ],
+            "name": "marital",
+            "defaultValue": "single",
             "hidden": false
           }
         }
@@ -250,18 +331,18 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423836262027_-210588283",
       "id": "20150213-230422_1600658137",
-      "dateCreated": "Feb 13, 2015 11:04:22 PM",
-      "dateStarted": "Dec 17, 2016 3:31:05 PM",
-      "dateFinished": "Dec 17, 2016 3:31:09 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-13 23:04:22.000",
+      "dateStarted": "2016-12-17 15:31:05.000",
+      "dateFinished": "2016-12-17 15:31:09.000",
+      "status": "FINISHED"
     },
     {
       "text": "%md\n## Congratulations, it\u0027s done.\n##### You can create your own notebook in \u0027Notebook\u0027 menu. Good luck!",
       "user": "anonymous",
-      "dateUpdated": "Dec 17, 2016 3:30:24 PM",
+      "dateUpdated": "2016-12-17 15:30:24.000",
       "config": {
         "colWidth": 12.0,
         "editorHide": true,
@@ -296,18 +377,18 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1423836268492_216498320",
       "id": "20150213-230428_1231780373",
-      "dateCreated": "Feb 13, 2015 11:04:28 PM",
-      "dateStarted": "Dec 17, 2016 3:30:24 PM",
-      "dateFinished": "Dec 17, 2016 3:30:29 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-02-13 23:04:28.000",
+      "dateStarted": "2016-12-17 15:30:24.000",
+      "dateFinished": "2016-12-17 15:30:29.000",
+      "status": "FINISHED"
     },
     {
       "text": "%md\n\nAbout bank data\n\n```\nCitation Request:\n  This dataset is public available for research. The details are described in [Moro et al., 2011]. \n  Please include this citation if you plan to use this database:\n\n  [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n  In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011 [...]
       "user": "anonymous",
-      "dateUpdated": "Dec 17, 2016 3:30:34 PM",
+      "dateUpdated": "2016-12-17 15:30:34.000",
       "config": {
         "colWidth": 12.0,
         "editorHide": true,
@@ -342,13 +423,13 @@
         ]
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1427420818407_872443482",
       "id": "20150326-214658_12335843",
-      "dateCreated": "Mar 26, 2015 9:46:58 PM",
-      "dateStarted": "Dec 17, 2016 3:30:34 PM",
-      "dateFinished": "Dec 17, 2016 3:30:34 PM",
-      "status": "FINISHED",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-03-26 21:46:58.000",
+      "dateStarted": "2016-12-17 15:30:34.000",
+      "dateFinished": "2016-12-17 15:30:34.000",
+      "status": "FINISHED"
     },
     {
       "config": {},
@@ -357,20 +438,26 @@
         "forms": {}
       },
       "apps": [],
+      "progressUpdateIntervalMs": 500,
       "jobName": "paragraph_1435955447812_-158639899",
       "id": "20150703-133047_853701097",
-      "dateCreated": "Jul 3, 2015 1:30:47 PM",
-      "status": "READY",
-      "progressUpdateIntervalMs": 500
+      "dateCreated": "2015-07-03 13:30:47.000",
+      "status": "READY"
     }
   ],
   "name": "Basic Features (Spark)",
   "id": "2A94M5J1Z",
+  "defaultInterpreterGroup": "spark",
+  "permissions": {},
+  "noteParams": {},
+  "noteForms": {},
   "angularObjects": {
     "2C73DY9P9:shared_process": []
   },
   "config": {
-    "looknfeel": "default"
+    "looknfeel": "default",
+    "isZeppelinNotebookCronEnable": true
   },
-  "info": {}
+  "info": {},
+  "path": "/Spark Tutorial/Basic Features (Spark)"
 }
\ No newline at end of file
diff --git a/notebook/Spark Tutorial/Spark MlLib_2EZFM3GJA.zpln b/notebook/Spark Tutorial/Spark MlLib_2EZFM3GJA.zpln
new file mode 100644
index 0000000..7151807
--- /dev/null
+++ b/notebook/Spark Tutorial/Spark MlLib_2EZFM3GJA.zpln	
@@ -0,0 +1,476 @@
+{
+  "paragraphs": [
+    {
+      "title": "Introduction",
+      "text": "%md\n\nThis is a tutorial of how to use Spark MLlib in Zeppelin, we have 2 examples in this note:\n\n* Linear regression, we generate some random data and use a linear regression to fit this data. We use bokeh here to visualize the data and the fitted model.  Besides training, we also visualize the loss value over iteration.\n* Logstic regression, we use the offical `sample_binary_classification_data` of spark as the training data. Besides training, we also visualize the l [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-19 17:49:49.825",
+      "config": {
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true,
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "text",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/text",
+        "editorHide": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis is a tutorial of how to use Spark MLlib in Zeppelin, we have 2 examples in this note:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eLinear regression, we generate some random data and use a linear regression to fit this data. We use bokeh here to visualize the data and the fitted model.  Besides training, we also visualize the loss value over iteration.\u003c/li\u003e\n\u003cli\u003eLogstic regression, we [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579424966763_-908073771",
+      "id": "paragraph_1579424966763_-908073771",
+      "dateCreated": "2020-01-19 17:09:26.763",
+      "dateStarted": "2020-01-19 17:49:38.749",
+      "dateFinished": "2020-01-19 17:49:38.760",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Initialize bokeh for visualization",
+      "text": "%spark.pyspark\n\n\nimport bkzep\nimport numpy as np\nfrom bokeh.io import output_notebook, show\nfrom bokeh.plotting import figure\n\noutput_notebook(notebook_type\u003d\u0027zeppelin\u0027)\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:32:42.152",
+      "config": {
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/python",
+        "results": {},
+        "enabled": true,
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n    \u003cdiv class\u003d\"bk-root\"\u003e\n        \u003ca href\u003d\"https://bokeh.pydata.org\" target\u003d\"_blank\" class\u003d\"bk-logo bk-logo-small bk-logo-notebook\"\u003e\u003c/a\u003e\n        \u003cspan id\u003d\"1001\"\u003eLoading BokehJS ...\u003c/span\u003e\n    \u003c/div\u003e\n\n"
+          },
+          {
+            "type": "HTML",
+            "data": "\u003cscript type\u003d\"text/javascript\"\u003e\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force \u003d true;\n\n  if (typeof root._bokeh_onload_callbacks \u003d\u003d\u003d \"undefined\" || force \u003d\u003d\u003d true) {\n    root._bokeh_onload_callbacks \u003d [];\n    root._bokeh_is_loading \u003d undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) \u003d\u003d\u003d \"undefined\" || force \u003d\u003d\u003d true) { [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637676_907388241",
+      "id": "20170621-112503_896367416",
+      "dateCreated": "2020-01-07 17:00:37.676",
+      "dateStarted": "2020-02-06 17:32:42.158",
+      "dateFinished": "2020-02-06 17:32:48.154",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Generate Data",
+      "text": "%spark.pyspark\n\nimport numpy as np\nfrom bokeh.io import output_notebook, show\nfrom bokeh.plotting import figure\n\n\nnum \u003d 1000\nx \u003d np.linspace(0, 10, num)\ny \u003d 2 * x + np.random.normal(0,4, num)\n\np \u003d figure()\np.circle(x, y)\nshow(p)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:32:49.175",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\n\n\n\n\n  \u003cdiv class\u003d\"bk-root\" id\u003d\"5796ac13-2a0b-4818-bde3-53c6f6d63706\" data-root-id\u003d\"1002\"\u003e\u003c/div\u003e\n\n"
+          },
+          {
+            "type": "HTML",
+            "data": "\u003cscript type\u003d\"text/javascript\"\u003e(function(root) {\n  function embed_document(root) {\n    \n  var docs_json \u003d {\"5bcb0995-0594-4e13-a66c-232301b9ac8b\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1011\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1015\",\"type\":\"Grid\"},{\"id\":\"1020\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1016\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"1037\",\"type\":\"GlyphRenderer\"}], [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637677_-677869302",
+      "id": "20170621-112549_1048642377",
+      "dateCreated": "2020-01-07 17:00:37.677",
+      "dateStarted": "2020-02-06 17:32:49.210",
+      "dateFinished": "2020-02-06 17:32:49.446",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Linear Regression",
+      "text": "%spark.pyspark\n\n\nfrom pyspark.ml.regression import LinearRegression\nimport pandas as pd\nfrom pyspark.ml.linalg import DenseVector, Vectors, VectorUDT\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import UserDefinedType, StringType\n\nto_vector \u003d udf(lambda x: Vectors.dense(x), VectorUDT())\n\ndf \u003d pd.DataFrame({\u0027features\u0027: x, \u0027label\u0027: y})\ntraining \u003d spark.createDataFrame(df).withColumn(\u0027features\u0027, to_vecto [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:32:50.665",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "Coefficients: [1.8614528996941055]\nIntercept: 0.7649881218726886\nnumIterations: 3\nobjectiveHistory: [0.49949999999999994, 0.4246037680574619, 0.1855160673747118]\n+-------------------+\n|          residuals|\n+-------------------+\n| 0.7273003952948311|\n|-1.0819930705271357|\n|  6.062463081770692|\n| -2.500318041753202|\n| -5.423033254038138|\n|-5.3530888966425465|\n| -8.505553026370603|\n| -4.229368966305803|\n|-2.9683254567772837|\n| -1.606198361582193|\n|  6.0 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637679_-1199864234",
+      "id": "20170621-120434_191947265",
+      "dateCreated": "2020-01-07 17:00:37.679",
+      "dateStarted": "2020-02-06 17:32:50.670",
+      "dateFinished": "2020-02-06 17:32:55.588",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Linear Regression",
+      "text": "%spark.pyspark\n\n\np \u003d figure()\np.circle(x, y)\n\ny_predict \u003d x * lrModel.coefficients + (lrModel.intercept)\np.line(x, y_predict, color\u003d\u0027red\u0027, line_width\u003d3)\n\nshow(p)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:32:59.103",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\n\n\n\n\n  \u003cdiv class\u003d\"bk-root\" id\u003d\"2fc464e9-475f-42ee-b903-8ea913a6bf48\" data-root-id\u003d\"1093\"\u003e\u003c/div\u003e\n\n"
+          },
+          {
+            "type": "HTML",
+            "data": "\u003cscript type\u003d\"text/javascript\"\u003e(function(root) {\n  function embed_document(root) {\n    \n  var docs_json \u003d {\"9dc37edf-f381-4505-b27a-79c925d75416\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1102\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1106\",\"type\":\"Grid\"},{\"id\":\"1111\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1107\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"1128\",\"type\":\"GlyphRenderer\"},{ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637679_-622761908",
+      "id": "20170621-120503_149885741",
+      "dateCreated": "2020-01-07 17:00:37.679",
+      "dateStarted": "2020-02-06 17:32:59.111",
+      "dateFinished": "2020-02-06 17:32:59.348",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Loss",
+      "text": "%spark.pyspark\n\np \u003d figure()\ntrainingSummary.objectiveHistory\np.line(range(len(trainingSummary.objectiveHistory)), trainingSummary.objectiveHistory, color\u003d\u0027blue\u0027, line_width\u003d3)\nshow(p)\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:33:00.975",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\n\n\n\n\n  \u003cdiv class\u003d\"bk-root\" id\u003d\"647265e5-85b1-48fb-b0a3-31471b5412ed\" data-root-id\u003d\"1201\"\u003e\u003c/div\u003e\n\n"
+          },
+          {
+            "type": "HTML",
+            "data": "\u003cscript type\u003d\"text/javascript\"\u003e(function(root) {\n  function embed_document(root) {\n    \n  var docs_json \u003d {\"3c275376-9866-400a-9866-f1bb83b9bbae\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1210\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1214\",\"type\":\"Grid\"},{\"id\":\"1219\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1215\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\":\"1236\",\"type\":\"GlyphRenderer\"}], [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637681_1726402393",
+      "id": "20170621-120529_381271006",
+      "dateCreated": "2020-01-07 17:00:37.681",
+      "dateStarted": "2020-02-06 17:33:00.979",
+      "dateFinished": "2020-02-06 17:33:01.160",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Download data for logics regression",
+      "text": "%sh\n\ncd /tmp\nwget https://github.com/apache/spark/raw/master/data/mllib/sample_binary_classification_data.txt\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:33:09.327",
+      "config": {
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true,
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "sh",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "editorMode": "ace/mode/sh"
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "--2020-02-06 17:33:10--  https://github.com/apache/spark/raw/master/data/mllib/sample_binary_classification_data.txt\nResolving github.com (github.com)... 52.192.72.89\nConnecting to github.com (github.com)|52.192.72.89|:443... connected.\nHTTP request sent, awaiting response... 302 Found\nLocation: https://raw.githubusercontent.com/apache/spark/master/data/mllib/sample_binary_classification_data.txt [following]\n--2020-02-06 17:33:11--  https://raw.githubusercontent [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579425300864_833813189",
+      "id": "paragraph_1579425300864_833813189",
+      "dateCreated": "2020-01-19 17:15:00.864",
+      "dateStarted": "2020-02-06 17:33:09.332",
+      "dateFinished": "2020-02-06 17:33:16.226",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Logistic Regression",
+      "text": "%spark.pyspark\n\nfrom pyspark.ml.classification import LogisticRegression\nfrom pyspark.ml.classification import NaiveBayes\nfrom pyspark.sql import SparkSession\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import DoubleType\n\n# Load data\ndataset \u003d spark.read.format(\"libsvm\").load(\"file:///tmp/sample_binary_classification_data.txt\")\ndataset \u003d dataset.randomSplit([0.7,0.3])\ntrainSet, testSet \u003d (dataset[0], dataset[1])\ntrainSet.cach [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:33:38.974",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "1.0"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637682_-1775952874",
+      "id": "20170621-120800_296677966",
+      "dateCreated": "2020-01-07 17:00:37.682",
+      "dateStarted": "2020-02-06 17:33:38.980",
+      "dateFinished": "2020-02-06 17:33:41.157",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Loss",
+      "text": "%spark.pyspark\n\np \u003d figure()\np.line(x\u003drange(len(objectiveHistory1)), y\u003dobjectiveHistory1, color\u003d\u0027blue\u0027, line_width\u003d2, legend\u003d\u0027Loss of LR1\u0027)\n\nshow(p)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:33:45.592",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "title": true,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "table",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "Col1": "string",
+                      "Col2": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                }
+              },
+              "commonSetting": {}
+            }
+          }
+        },
+        "enabled": true,
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\n\n\n\n\n  \u003cdiv class\u003d\"bk-root\" id\u003d\"9710f9d2-4d4a-4bd4-857b-c7ea6c849866\" data-root-id\u003d\"1392\"\u003e\u003c/div\u003e\n\n"
+          },
+          {
+            "type": "HTML",
+            "data": "\u003cscript type\u003d\"text/javascript\"\u003e(function(root) {\n  function embed_document(root) {\n    \n  var docs_json \u003d {\"657f0457-a7a8-40b7-9b69-ca6a0947d281\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1401\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1405\",\"type\":\"Grid\"},{\"id\":\"1410\",\"type\":\"Grid\"},{\"id\":\"1436\",\"type\":\"Legend\"}],\"left\":[{\"id\":\"1406\",\"type\":\"LinearAxis\"}],\"renderers\":[{\"id\": [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637682_-1012332713",
+      "id": "20170621-121651_569823333",
+      "dateCreated": "2020-01-07 17:00:37.682",
+      "dateStarted": "2020-02-06 17:33:45.597",
+      "dateFinished": "2020-02-06 17:33:45.769",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.pyspark\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-07 17:00:37.682",
+      "config": {},
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387637682_-181840828",
+      "id": "20170621-155011_1790753917",
+      "dateCreated": "2020-01-07 17:00:37.682",
+      "status": "READY"
+    }
+  ],
+  "name": "Spark MlLib",
+  "id": "2EZFM3GJA",
+  "defaultInterpreterGroup": "spark",
+  "version": "0.9.0-SNAPSHOT",
+  "permissions": {
+    "owners": [],
+    "runners": [],
+    "readers": [],
+    "writers": []
+  },
+  "noteParams": {},
+  "noteForms": {},
+  "angularObjects": {},
+  "config": {
+    "isZeppelinNotebookCronEnable": true
+  },
+  "info": {},
+  "path": "/Spark Tutorial/Spark MlLib"
+}
\ No newline at end of file
diff --git a/notebook/Spark Tutorial/Spark SQL (PySpark)_2EWM84JXA.zpln b/notebook/Spark Tutorial/Spark SQL (PySpark)_2EWM84JXA.zpln
new file mode 100644
index 0000000..6a6bd7b
--- /dev/null
+++ b/notebook/Spark Tutorial/Spark SQL (PySpark)_2EWM84JXA.zpln	
@@ -0,0 +1,1184 @@
+{
+  "paragraphs": [
+    {
+      "title": "Introduction",
+      "text": "%md\n\nThis is a tutorial for Spark SQL in PySpark (based on Spark 2.x).  First we need to clarifiy serveral concetps of Spark SQL\n\n* **SparkSession**   - This is the entry point of Spark SQL, you use `SparkSession` to create DataFrame/Dataset, register UDF, query table and etc.\n* **DataFrame**      - There\u0027s no Dataset in PySpark, but only DataFrame. The DataFrame of PySpark is very similar with DataFrame concept of Pandas, but is distributed. \n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:33.852",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis is a tutorial for Spark SQL in PySpark (based on Spark 2.x).  First we need to clarifiy serveral concetps of Spark SQL\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eSparkSession\u003c/strong\u003e   - This is the entry point of Spark SQL, you use \u003ccode\u003eSparkSession\u003c/code\u003e to create DataFrame/Dataset, register UDF, query table and etc.\u003c/li\u003e\n\u003cli\u003e\u0 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700849_2035279674",
+      "id": "20180530-101118_380906698",
+      "dateCreated": "2020-01-07 17:01:40.849",
+      "dateStarted": "2020-01-21 15:46:33.862",
+      "dateFinished": "2020-01-21 15:46:35.134",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create DataFrame",
+      "text": "%md\n\nThere\u0027s 2 ways to create DataFrame\n\n* Use SparkSession to create DataFrame directly. You can either create DataFrame from RDD, List type objects and etc.\n* Use DataFrameReader to create Dataset/DataFrame from many kinds of storages that is supported by spark, such as HDFS, jdbc and etc.",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:35.190",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThere\u0026rsquo;s 2 ways to create DataFrame\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eUse SparkSession to create DataFrame directly. You can either create DataFrame from RDD, List type objects and etc.\u003c/li\u003e\n\u003cli\u003eUse DataFrameReader to create Dataset/DataFrame from many kinds of storages that is supported by spark, such as HDFS, jdbc and etc.\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/d [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_-1934281275",
+      "id": "20180530-101515_948520659",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:46:35.204",
+      "dateFinished": "2020-01-21 15:46:35.222",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Prerequisites",
+      "text": "%md\n\n\n\n**It is strongly recommended to run the following %spark.conf paragraph first to make sure correct configuration is used.**",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:35.304",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003e\u003cstrong\u003eIt is strongly recommended to run the following %spark.conf paragraph first to make sure correct configuration is used.\u003c/strong\u003e\u003c/p\u003e\n\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_-1709355244",
+      "id": "20180530-110023_1756702033",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:46:35.314",
+      "dateFinished": "2020-01-21 15:46:35.326",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Spark Configuration",
+      "text": "%spark.conf\n\n# It is strongly recommended to set SPARK_HOME explictly instead of using the embedded spark of Zeppelin. As the function of embedded spark of Zeppelin is limited and can only run in local mode.\n# SPARK_HOME /Users/jzhang/Java/lib/spark-2.3.0-bin-hadoop2.7\n\n# Uncomment the following line if you want to use yarn-cluster mode (It is recommended to use yarn-cluster mode from Zeppelin 0.8, as the driver will run on the remote host of yarn cluster which can mi [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:35.418",
+      "config": {
+        "editorSetting": {
+          "language": "text",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/text",
+        "fontSize": 9.0,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": []
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_-1532509261",
+      "id": "20180530-110007_162886838",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:46:35.428",
+      "dateFinished": "2020-01-21 15:46:35.453",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create Dataset/DataFrame via SparkSession",
+      "text": "%spark.pyspark\n\n# create DataFrame from python list. It can infer schema for you.\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\ndf1.printSchema\ndf1.show()\n\n# create DataFrame from pandas dataframe\ndf2 \u003d spark.createDataFrame(df1.toPandas())\ndf2.printSchema\ndf2.show()\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:35.640",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| andy| 20|    USA|\n|  2| jeff| 23|  China|\n|  3|james| 18|    USA|\n+---+-----+---+-------+\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| andy| 20|    USA|\n|  2| jeff| 23|  China|\n|  3|james| 18|    USA|\n+---+-----+---+-------+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_1345292725",
+      "id": "20180530-101750_1491737301",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:46:35.647",
+      "dateFinished": "2020-01-21 15:46:57.355",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create DataFrame via DataFrameReader",
+      "text": "%spark.pyspark\n\n# Read data from json file\n# link for this people.json (https://github.com/apache/spark/blob/master/examples/src/main/resources/people.json)\n# Make sure you have this file on local file system or hdfs\ndf1 \u003d spark.read.json(\"file:///Users/jzhang/Java/lib/spark-2.4.3-bin-hadoop2.7/examples/src/main/resources/people.json\")\ndf1.printSchema()\ndf1.show()\n\n# Read data from csv file. You can customize it via spark.read.options. E.g. In the following [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:46:57.398",
+      "config": {
+        "lineNumbers": false,
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "root\n |-- age: long (nullable \u003d true)\n |-- name: string (nullable \u003d true)\n\n+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n\nroot\n |-- name: string (nullable \u003d true)\n |-- age: string (nullable \u003d true)\n |-- job: string (nullable \u003d true)\n\n+-----+---+---------+\n| name|age|      job|\n+-----+---+---------+\n|Jorge| 30|Developer|\n|  Bob| 32|Developer|\n+-----+---+---------+ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_581443636",
+      "id": "20180530-101930_1495479697",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:46:57.417",
+      "dateFinished": "2020-01-21 15:47:00.449",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Add New Column",
+      "text": "%spark.pyspark\n\n# withColumn could be used to add new Column\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\n\ndf2 \u003d df1.withColumn(\"age2\", df1[\"age\"] + 1)\ndf2.show()\n\n# the new column could replace the existing the column if the new column name is the same as the old column\ndf3 \u003d df1.withColumn(\"age\", df1[\"age\"] + 1)\ndf3.show()\ [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:00.454",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+---+-------+----+\n| id| name|age|country|age2|\n+---+-----+---+-------+----+\n|  1| andy| 20|    USA|  21|\n|  2| jeff| 23|  China|  24|\n|  3|james| 18|    USA|  19|\n+---+-----+---+-------+----+\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| andy| 21|    USA|\n|  2| jeff| 24|  China|\n|  3|james| 19|    USA|\n+---+-----+---+-------+\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| A [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_-775755394",
+      "id": "20180530-105113_693855403",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:00.460",
+      "dateFinished": "2020-01-21 15:47:01.062",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Remove Column",
+      "text": "%spark.pyspark\n\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\n# drop could be used to remove Column\ndf2 \u003d df1.drop(\"id\")\ndf2.show()\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:01.074",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+-------+\n| name|age|country|\n+-----+---+-------+\n| andy| 20|    USA|\n| jeff| 23|  China|\n|james| 18|    USA|\n+-----+---+-------+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_-886487025",
+      "id": "20180530-112045_1274721210",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:01.080",
+      "dateFinished": "2020-01-21 15:47:01.415",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Select Subset of Columns",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\n# select can accept a list of string of the column names\ndf2 \u003d df1.select(\"id\", \"name\")\ndf2.show()\n\n# select can also accept a list of Column. You can create column via $ or udf\nimport pyspark.sql.functions as F\ndf3 \u003d df1.select(df1[\"id\"], F.upper(df1[\"name\"]), df1[\ [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:01.495",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+\n| id| name|\n+---+-----+\n|  1| andy|\n|  2| jeff|\n|  3|james|\n+---+-----+\n\n+---+-----------+---------+\n| id|upper(name)|(age + 1)|\n+---+-----------+---------+\n|  1|       ANDY|       21|\n|  2|       JEFF|       24|\n|  3|      JAMES|       19|\n+---+-----------+---------+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_2124268380",
+      "id": "20180530-113042_1154914545",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:01.501",
+      "dateFinished": "2020-01-21 15:47:01.947",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Filter Rows",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\n\n# filter accept a Column \ndf2 \u003d df1.filter(df1[\"age\"] \u003e\u003d 20)\ndf2.show()\n\n# To be noticed, you need to use \"\u0026\" instead of \"\u0026\u0026\" or \"AND\" \ndf3 \u003d df1.filter((df1[\"age\"] \u003e\u003d 20) \u0026 (df1[\"country\"] \u003d\u003d \"China\"))\ndf3.sh [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:02.009",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  1|andy| 20|    USA|\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_1501705200",
+      "id": "20180530-113407_58454283",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:02.015",
+      "dateFinished": "2020-01-21 15:47:02.458",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create UDF",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]) \\\n            .toDF(\"id\", \"name\", \"age\", \"country\")\n\n# Create udf create python lambda\nfrom pyspark.sql.functions import udf\nudf1 \u003d udf(lambda e: e.upper())\ndf2 \u003d df1.select(udf1(df1[\"name\"]))\ndf2.show()\n\n# UDF could also be used in filter, in this case the return type must be Boolean\n# We can also use an [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:02.525",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+--------------+\n|\u003clambda\u003e(name)|\n+--------------+\n|          ANDY|\n|          JEFF|\n|         JAMES|\n+--------------+\n\n+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  1|andy| 20|    USA|\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n+------------+\n|name_country|\n+------------+\n|    andy_USA|\n|  jeff_China|\n|   james_USA|\n+------------+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_574730063",
+      "id": "20180530-113720_1986531680",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:02.532",
+      "dateFinished": "2020-01-21 15:47:03.455",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GroupBy",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]) \\\n           .toDF(\"id\", \"name\", \"age\", \"country\")\n\n# You can call agg function after groupBy directly, such as count/min/max/avg/sum\ndf2 \u003d df1.groupBy(\"country\").count()\ndf2.show()\n\n# Pass a Map if you want to do multiple aggregation\ndf3 \u003d df1.groupBy(\"country\").agg({\"age\": \"avg\", \"id\": \"count\"}) [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:03.553",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-------+-----+\n|country|count|\n+-------+-----+\n|  China|    1|\n|    USA|    2|\n+-------+-----+\n\n+-------+---------+--------+\n|country|count(id)|avg(age)|\n+-------+---------+--------+\n|  China|        1|    23.0|\n|    USA|        2|    19.0|\n+-------+---------+--------+\n\n+-------+-------+-----+\n|country|avg_age|count|\n+-------+-------+-----+\n|  China|   23.0|    1|\n|    USA|   19.0|    2|\n+-------+-------+-----+\n\n+-------+-------+-------+\n|count [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700850_1233271138",
+      "id": "20180530-114404_2076888937",
+      "dateCreated": "2020-01-07 17:01:40.850",
+      "dateStarted": "2020-01-21 15:47:03.560",
+      "dateFinished": "2020-01-21 15:47:08.101",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Join on Single Field",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, 1), (2, \"jeff\", 23, 2), (3, \"james\", 18, 3)]).toDF(\"id\", \"name\", \"age\", \"c_id\")\ndf1.show()\n\ndf2 \u003d spark.createDataFrame([(1, \"USA\"), (2, \"China\")]).toDF(\"c_id\", \"c_name\")\ndf2.show()\n\n# You can just specify the key name if join on the same key\ndf3 \u003d df1.join(df2, \"c_id\")\ndf3.show()\n\n# Or you can specify the join condition expclitly in case the key is different bet [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:08.177",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+---+----+\n| id| name|age|c_id|\n+---+-----+---+----+\n|  1| andy| 20|   1|\n|  2| jeff| 23|   2|\n|  3|james| 18|   3|\n+---+-----+---+----+\n\n+----+------+\n|c_id|c_name|\n+----+------+\n|   1|   USA|\n|   2| China|\n+----+------+\n\n+----+---+----+---+------+\n|c_id| id|name|age|c_name|\n+----+---+----+---+------+\n|   1|  1|andy| 20|   USA|\n|   2|  2|jeff| 23| China|\n+----+---+----+---+------+\n\n+---+----+---+----+----+------+\n| id|name|age|c_id|c [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_-770209064",
+      "id": "20180530-130126_1642948432",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:08.182",
+      "dateFinished": "2020-01-21 15:47:11.909",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Join on Multiple Fields",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(\"andy\", 20, 1, 1), (\"jeff\", 23, 1, 2), (\"james\", 12, 2, 2)]).toDF(\"name\", \"age\", \"key_1\", \"key_2\")\ndf1.show()\n\ndf2 \u003d spark.createDataFrame([(1, 1, \"USA\"), (2, 2, \"China\")]).toDF(\"key_1\", \"key_2\", \"country\")\ndf2.show()\n\n# Join on 2 fields: key_1, key_2\n\n# You can pass a list of field name if the join field names are the same in both tables\ndf3 \u003d df1.join(df2, [\"key_1\", \"key_2\ [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:11.948",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+-----+-----+\n| name|age|key_1|key_2|\n+-----+---+-----+-----+\n| andy| 20|    1|    1|\n| jeff| 23|    1|    2|\n|james| 12|    2|    2|\n+-----+---+-----+-----+\n\n+-----+-----+-------+\n|key_1|key_2|country|\n+-----+-----+-------+\n|    1|    1|    USA|\n|    2|    2|  China|\n+-----+-----+-------+\n\n+-----+-----+-----+---+-------+\n|key_1|key_2| name|age|country|\n+-----+-----+-----+---+-------+\n|    1|    1| andy| 20|    USA|\n|    2|    2|james| 12 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_-177297320",
+      "id": "20180530-135600_354945835",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:11.957",
+      "dateFinished": "2020-01-21 15:47:14.394",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Use SQL directly",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]) \\\n           .toDF(\"id\", \"name\", \"age\", \"country\")\n# call createOrReplaceTempView first if you want to query this DataFrame via sql\ndf1.createOrReplaceTempView(\"people\")\n# SparkSession.sql return DataFrame\ndf2 \u003d spark.sql(\"select name, age from people\")\ndf2.show()\n\n# You need to register udf if you want to use [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:14.406",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+\n| name|age|\n+-----+---+\n| andy| 20|\n| jeff| 23|\n|james| 18|\n+-----+---+\n\n+----------+---+\n|udf1(name)|age|\n+----------+---+\n|      ANDY| 20|\n|      JEFF| 23|\n|     JAMES| 18|\n+----------+---+\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_1756979054",
+      "id": "20180530-132023_995737505",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:14.411",
+      "dateFinished": "2020-01-21 15:47:16.881",
+      "status": "FINISHED"
+    },
+    {
+      "text": "%spark.sql\n\nshow tables",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:48:23.385",
+      "config": {
+        "runOnSelectionChange": true,
+        "title": true,
+        "checkEmpty": true,
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "table",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "database": "string",
+                      "tableName": "string",
+                      "isTemporary": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                }
+              },
+              "commonSetting": {}
+            }
+          }
+        },
+        "editorSetting": {
+          "language": "sql",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/sql"
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "database\ttableName\tisTemporary\ndefault\tbank\tfalse\ndefault\tbank_raw\tfalse\ndefault\tdepartments\tfalse\ndefault\tdest_csv\tfalse\ndefault\tdest_kafka\tfalse\ndefault\tdest_orc\tfalse\ndefault\temployees\tfalse\ndefault\titems\tfalse\ndefault\tsource_csv\tfalse\ndefault\tsource_kafka\tfalse\ndefault\tweb_log_small\tfalse\n\tpeople\ttrue\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578388432752_877455036",
+      "id": "paragraph_1578388432752_877455036",
+      "dateCreated": "2020-01-07 17:13:52.752",
+      "dateStarted": "2020-01-21 15:47:16.980",
+      "dateFinished": "2020-01-21 15:47:17.086",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Query Spark Catalog",
+      "text": "%spark.pyspark\n\nprint(spark.catalog.listTables())\n\n# make sure you have table `cities` in your database\nprint(spark.catalog.listColumns(\"bank\"))",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:48:18.292",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "[Table(name\u003d\u0027bank\u0027, database\u003d\u0027default\u0027, description\u003dNone, tableType\u003d\u0027MANAGED\u0027, isTemporary\u003dFalse), Table(name\u003d\u0027bank_raw\u0027, database\u003d\u0027default\u0027, description\u003dNone, tableType\u003d\u0027MANAGED\u0027, isTemporary\u003dFalse), Table(name\u003d\u0027departments\u0027, database\u003d\u0027default\u0027, description\u003dNone, tableType\u003d\u0027MANAGED\u0027, isTemporary\u003dFalse),  [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_1504531246",
+      "id": "20180530-141842_1851074658",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:17.187",
+      "dateFinished": "2020-01-21 15:47:18.281",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset",
+      "text": "%md\n\nThere\u0027s 2 approaches to visuliaze DataFrame/Dataset in Zeppelin\n\n* Use SparkSQLInterpreter via `%spark.sql`\n* Use ZeppelinContext via `z.show`\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:18.301",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThere\u0026rsquo;s 2 approaches to visuliaze DataFrame/Dataset in Zeppelin\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eUse SparkSQLInterpreter via \u003ccode\u003e%spark.sql\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eUse ZeppelinContext via \u003ccode\u003ez.show\u003c/code\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_1924561483",
+      "id": "20180530-132128_2114955642",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:18.307",
+      "dateFinished": "2020-01-21 15:47:18.328",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset via z.show",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]).toDF(\"id\", \"name\", \"age\", \"country\")\ndf2 \u003d df1.groupBy(\"country\").count()\nz.show(df2)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:48:10.658",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "multiBarChart",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                },
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "country",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "count",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
+            },
+            "helium": {}
+          },
+          "1": {
+            "graph": {
+              "mode": "multiBarChart",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                },
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "country",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "count",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
+            },
+            "helium": {}
+          }
+        },
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "country\tcount\nChina\t1\nUSA\t2\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_-809695439",
+      "id": "20180530-132634_1285621466",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:18.414",
+      "dateFinished": "2020-01-21 15:47:19.426",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset via %spark.sql",
+      "text": "%spark.pyspark\n\ndf1 \u003d spark.createDataFrame([(1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\")]) \\\n            .toDF(\"id\", \"name\", \"age\", \"country\")\n            \n# register this DataFrame first before querying it via %spark.sql\ndf1.createOrReplaceTempView(\"people\")",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:48:13.928",
+      "config": {
+        "editorSetting": {
+          "language": "python",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/python",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": []
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_381269276",
+      "id": "20180530-132657_668624333",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:19.436",
+      "dateFinished": "2020-01-21 15:47:19.667",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.sql\n\nselect country, count(1) as count from people group by country",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:19.742",
+      "config": {
+        "editorSetting": {
+          "language": "sql",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "multiBarChart",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {
+                    "columns": [
+                      {
+                        "name": "country",
+                        "visible": true,
+                        "width": "*",
+                        "sort": {},
+                        "filters": [
+                          {}
+                        ],
+                        "pinned": ""
+                      },
+                      {
+                        "name": "count",
+                        "visible": true,
+                        "width": "*",
+                        "sort": {},
+                        "filters": [
+                          {}
+                        ],
+                        "pinned": ""
+                      }
+                    ],
+                    "scrollFocus": {},
+                    "selection": [],
+                    "grouping": {
+                      "grouping": [],
+                      "aggregations": [],
+                      "rowExpandedStates": {}
+                    },
+                    "treeView": {},
+                    "pagination": {
+                      "paginationCurrentPage": 1.0,
+                      "paginationPageSize": 250.0
+                    }
+                  },
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "number"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                },
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default",
+                  "stacked": false
+                },
+                "stackedAreaChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                },
+                "lineChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "country",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "count",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
+            },
+            "helium": {}
+          }
+        },
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "title": false,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "country\tcount\nChina\t1\nUSA\t2\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_601200360",
+      "id": "20180530-132823_944494152",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "dateStarted": "2020-01-21 15:47:19.750",
+      "dateFinished": "2020-01-21 15:47:20.480",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.sql\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:47:20.561",
+      "config": {
+        "editorSetting": {
+          "language": "sql",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0,
+        "results": {},
+        "enabled": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1578387700851_-2136199261",
+      "id": "20180530-132849_1305166760",
+      "dateCreated": "2020-01-07 17:01:40.851",
+      "status": "FINISHED"
+    }
+  ],
+  "name": "Spark SQL Tutorial (PySpark)",
+  "id": "2EWM84JXA",
+  "defaultInterpreterGroup": "spark",
+  "version": "0.9.0-SNAPSHOT",
+  "permissions": {
+    "owners": [],
+    "runners": [],
+    "readers": [],
+    "writers": []
+  },
+  "noteParams": {},
+  "noteForms": {},
+  "angularObjects": {},
+  "config": {
+    "isZeppelinNotebookCronEnable": true
+  },
+  "info": {
+    "isRunning": false
+  },
+  "path": "/Spark Tutorial/Spark SQL Tutorial (PySpark)"
+}
\ No newline at end of file
diff --git a/notebook/Spark Tutorial/Spark SQL (Scala)_2EYUV26VR.zpln b/notebook/Spark Tutorial/Spark SQL (Scala)_2EYUV26VR.zpln
new file mode 100644
index 0000000..040f089
--- /dev/null
+++ b/notebook/Spark Tutorial/Spark SQL (Scala)_2EYUV26VR.zpln	
@@ -0,0 +1,1034 @@
+{
+  "paragraphs": [
+    {
+      "title": "Introduction",
+      "text": "%md\n\nThis is a tutorial for Spark SQL in scala (based on Spark 2.x).  First we need to clarifiy serveral basic concepts of Spark SQL\n\n* **SparkSession**   - This is the entry point of Spark SQL, you use `SparkSession` to create DataFrame/Dataset, register UDF, query table and etc.\n* **Dataset**        - Dataset is the core abstraction of Spark SQL. Underneath Dataset is RDD, but Dataset know more about your data, specifically its structure, so that Dataset could do mo [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:18:56.158",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "results": {},
+        "enabled": true,
+        "title": false,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis is a tutorial for Spark SQL in scala (based on Spark 2.x).  First we need to clarifiy serveral basic concepts of Spark SQL\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eSparkSession\u003c/strong\u003e   - This is the entry point of Spark SQL, you use \u003ccode\u003eSparkSession\u003c/code\u003e to create DataFrame/Dataset, register UDF, query table and etc.\u003c/li\u003e\n\u003cli\u003 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308067_-799293654",
+      "id": "20180530-101118_380906698",
+      "dateCreated": "2020-01-21 15:55:08.068",
+      "dateStarted": "2020-02-01 21:18:56.165",
+      "dateFinished": "2020-02-01 21:18:56.177",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create Dataset/DataFrame",
+      "text": "%md\n\nThere\u0027s 2 ways to create Dataset/DataFrame\n\n* Use SparkSession to create Dataset/DataFrame directly. You can either create Dataset/DataFrame from RDD, Seq type and etc.\n* Use DataFrameReader to create Dataset/DataFrame from many kind of storages that is supported by spark, such as HDFS, jdbc and etc.",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:18:56.268",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThere\u0026rsquo;s 2 ways to create Dataset/DataFrame\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eUse SparkSession to create Dataset/DataFrame directly. You can either create Dataset/DataFrame from RDD, Seq type and etc.\u003c/li\u003e\n\u003cli\u003eUse DataFrameReader to create Dataset/DataFrame from many kind of storages that is supported by spark, such as HDFS, jdbc and etc.\u003c/li\u003e\n\u003c/ul\u00 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308068_1022597750",
+      "id": "20180530-101515_948520659",
+      "dateCreated": "2020-01-21 15:55:08.068",
+      "dateStarted": "2020-02-01 21:18:56.276",
+      "dateFinished": "2020-02-01 21:18:56.289",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Prerequisites",
+      "text": "%md\n\n\n\n**It is strongly recommended to run the following %spark.conf paragraph first to make sure correct configuration is used.**",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:18:56.377",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003e\u003cstrong\u003eIt is strongly recommended to run the following %spark.conf paragraph first to make sure correct configuration is used.\u003c/strong\u003e\u003c/p\u003e\n\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308068_-974291943",
+      "id": "20180530-110023_1756702033",
+      "dateCreated": "2020-01-21 15:55:08.069",
+      "dateStarted": "2020-02-01 21:18:56.382",
+      "dateFinished": "2020-02-01 21:18:56.390",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.conf\n\n# It is strongly recommended to set SPARK_HOME explictly instead of using the embedded spark of Zeppelin. As the function of embedded spark of Zeppelin is limited and can only run in local mode.\n# SPARK_HOME /Users/jzhang/Java/lib/spark-2.3.0-bin-hadoop2.7\n\n# Uncomment the following line if you want to use yarn-cluster mode (It is recommended to use yarn-cluster mode from Zeppelin 0.8, as the driver will run on the remote host of yarn cluster which can mi [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:18:56.483",
+      "config": {
+        "editorSetting": {
+          "language": "text",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/text",
+        "fontSize": 9.0,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "title": false,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": []
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308069_-1239490192",
+      "id": "20180530-110007_162886838",
+      "dateCreated": "2020-01-21 15:55:08.069",
+      "dateStarted": "2020-02-01 21:18:56.488",
+      "dateFinished": "2020-02-01 21:18:56.495",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create Dataset/DataFrame via SparkSession",
+      "text": "%spark\n\n// create DataFrame from scala Seq. It can infer schema for you.\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\ndf1.printSchema\ndf1.show()\n\n// create DataFrame from scala case class\ncase class Person(id:Int, name:String, age:Int, country:String)\nval df2 \u003d spark.createDataFrame(Seq(Person(1, \"andy\", 20, \"USA\"), Person(2, \" [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:04.926",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "root\n |-- id: integer (nullable \u003d false)\n |-- name: string (nullable \u003d true)\n |-- age: integer (nullable \u003d false)\n |-- country: string (nullable \u003d true)\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| andy| 20|    USA|\n|  2| jeff| 23|  China|\n|  3|james| 18|    USA|\n+---+-----+---+-------+\n\nroot\n |-- id: integer (nullable \u003d false)\n |-- name: string (nullable \u003d true)\n |-- age: integer (nulla [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308069_-1317689018",
+      "id": "20180530-101750_1491737301",
+      "dateCreated": "2020-01-21 15:55:08.069",
+      "dateStarted": "2020-02-01 21:18:56.594",
+      "dateFinished": "2020-02-01 21:18:57.788",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create Dataset/DataFrame via DataFrameReader",
+      "text": "%spark\n\n// Read data from json file\n// link for this people.json (https://github.com/apache/spark/blob/master/examples/src/main/resources/people.json)\n// Make sure you have this file on local file system or hdfs\nval df1 \u003d spark.read.json(\"people.json\")\ndf1.printSchema\ndf1.show()\n\n// Read data from csv file. You can customize it via spark.read.options. E.g. In the following example, we customize the sep and header\n// Add . at the end of this line to indidat [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:08.376",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "root\n |-- age: long (nullable \u003d true)\n |-- name: string (nullable \u003d true)\n\n+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n\nroot\n |-- name: string (nullable \u003d true)\n |-- age: string (nullable \u003d true)\n |-- job: string (nullable \u003d true)\n\n+-----+---+---------+\n| name|age|      job|\n+-----+---+---------+\n|Jorge| 30|Developer|\n|  Bob| 32|Developer|\n+-----+---+---------+ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308069_-1400272236",
+      "id": "20180530-101930_1495479697",
+      "dateCreated": "2020-01-21 15:55:08.069",
+      "dateStarted": "2020-02-01 21:18:57.832",
+      "dateFinished": "2020-02-01 21:18:59.855",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Add New Column",
+      "text": "%spark\n\n// withColumn could be used to add new Column\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\nval df2 \u003d df1.withColumn(\"age2\", $\"age\" + 1)\ndf2.show()\n\n// the new column could replace the existing the column if the new column name is the same as the old column\nval df3 \u003d df1.withColumn(\"age\", $\"age\" + 1)\ndf3.show()\n [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:12.779",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+---+-------+----+\n| id| name|age|country|age2|\n+---+-----+---+-------+----+\n|  1| andy| 20|    USA|  21|\n|  2| jeff| 23|  China|  24|\n|  3|james| 18|    USA|  19|\n+---+-----+---+-------+----+\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| andy| 21|    USA|\n|  2| jeff| 24|  China|\n|  3|james| 19|    USA|\n+---+-----+---+-------+\n\n+---+-----+---+-------+\n| id| name|age|country|\n+---+-----+---+-------+\n|  1| A [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308069_-1399664813",
+      "id": "20180530-105113_693855403",
+      "dateCreated": "2020-01-21 15:55:08.070",
+      "dateStarted": "2020-02-01 21:18:59.889",
+      "dateFinished": "2020-02-01 21:19:00.509",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Remove Column",
+      "text": "%spark\n\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n// drop could be used to remove Column\nval df2 \u003d df1.drop(\"id\")\ndf2.show()\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:17.756",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+-------+\n| name|age|country|\n+-----+---+-------+\n| andy| 20|    USA|\n| jeff| 23|  China|\n|james| 18|    USA|\n+-----+---+-------+\n\n\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n\u001b[1m\u001b[34mdf2\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [name: string, age: int ... 1 more field]\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308070_-1089278693",
+      "id": "20180530-112045_1274721210",
+      "dateCreated": "2020-01-21 15:55:08.070",
+      "dateStarted": "2020-02-01 21:19:00.607",
+      "dateFinished": "2020-02-01 21:19:00.968",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Select Subset of Columns",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n// select can accept a list of string of the column names\nval df2 \u003d df1.select(\"id\", \"name\")\ndf2.show()\n\n// select can also accept a list of Column. You can create column via $ or udf\nval df3 \u003d df1.select($\"id\", upper($\"name\"), $\"age\" + 1)\ndf3.show()\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:20.441",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+\n| id| name|\n+---+-----+\n|  1| andy|\n|  2| jeff|\n|  3|james|\n+---+-----+\n\n+---+-----------+---------+\n| id|upper(name)|(age + 1)|\n+---+-----------+---------+\n|  1|       ANDY|       21|\n|  2|       JEFF|       24|\n|  3|      JAMES|       19|\n+---+-----------+---------+\n\n\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n\u001b[1m\u001b[34mdf2\u001b[0m [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308070_462807812",
+      "id": "20180530-113042_1154914545",
+      "dateCreated": "2020-01-21 15:55:08.070",
+      "dateStarted": "2020-02-01 21:19:01.017",
+      "dateFinished": "2020-02-01 21:19:01.551",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Filter Rows",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n\n// filter accept a Column \nval df2 \u003d df1.filter($\"age\" \u003e\u003d 20)\ndf2.show()\n\n// To be noticed, you need to use \"\u003d\u003d\u003d\" for equal instead of \"\u003d\u003d\"\nval df3 \u003d df1.filter($\"age\" \u003e\u003d 20 \u0026\u0026 $\"country\" \u003d\u003d\u003d \"C [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:20:24.709",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": true,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  1|andy| 20|    USA|\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n\u001b[1m\u001b[34mdf2\u001b[0m: \u001b[1m\u001b[32morg.apach [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308070_-1931299693",
+      "id": "20180530-113407_58454283",
+      "dateCreated": "2020-01-21 15:55:08.070",
+      "dateStarted": "2020-02-01 21:19:01.628",
+      "dateFinished": "2020-02-01 21:19:02.122",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create UDF",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n\n// [String, String] The first String is return type of this UDF, and the second String is the UDF argument type\nval udf1 \u003d udf[String, String]((e:String) \u003d\u003e e.toUpperCase)\nval df2 \u003d df1.select(udf1($\"name\"))\ndf2.show()\n\n// UDF could also be used in filter, in thi [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:02.138",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---------+\n|UDF(name)|\n+---------+\n|     ANDY|\n|     JEFF|\n|    JAMES|\n+---------+\n\n+---+----+---+-------+\n| id|name|age|country|\n+---+----+---+-------+\n|  1|andy| 20|    USA|\n|  2|jeff| 23|  China|\n+---+----+---+-------+\n\n+------------+\n|name_country|\n+------------+\n|    andy_USA|\n|  jeff_China|\n|   james_USA|\n+------------+\n\n\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: str [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308070_2030518177",
+      "id": "20180530-113720_1986531680",
+      "dateCreated": "2020-01-21 15:55:08.070",
+      "dateStarted": "2020-02-01 21:19:02.389",
+      "dateFinished": "2020-02-01 21:19:03.122",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GroupBy",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n\n// You can call agg function after groupBy directly, such as count/min/max/avg/sum\nval df2 \u003d df1.groupBy(\"country\").count()\ndf2.show()\n\n// Pass a Map if you want to do multiple aggregation\nval df3 \u003d df1.groupBy(\"country\").agg(Map(\"age\"-\u003e\"avg\", \"id\" -\u003e \"c [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:03.207",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-------+-----+\n|country|count|\n+-------+-----+\n|  China|    1|\n|    USA|    2|\n+-------+-----+\n\n+-------+--------+---------+\n|country|avg(age)|count(id)|\n+-------+--------+---------+\n|  China|    23.0|        1|\n|    USA|    19.0|        2|\n+-------+--------+---------+\n\n+-------+-------+-----+\n|country|avg_age|count|\n+-------+-------+-----+\n|  China|   23.0|    1|\n|    USA|   19.0|    2|\n+-------+-------+-----+\n\n+-------+-------+-------+\n|count [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308070_839300878",
+      "id": "20180530-114404_2076888937",
+      "dateCreated": "2020-01-21 15:55:08.071",
+      "dateStarted": "2020-02-01 21:19:03.211",
+      "dateFinished": "2020-02-01 21:19:08.565",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Join on Single Field",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, 1), (2, \"jeff\", 23, 2), (3, \"james\", 18, 3))).toDF(\"id\", \"name\", \"age\", \"c_id\")\ndf1.show()\n\nval df2 \u003d spark.createDataFrame(Seq((1, \"USA\"), (2, \"China\"))).toDF(\"c_id\", \"c_name\")\ndf2.show()\n\n// You can just specify the key name if join on the same key\nval df3 \u003d df1.join(df2, \"c_id\")\ndf3.show()\n\n// Or you can specify the join condition expclitly in case the key is d [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:08.586",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+---+-----+---+----+\n| id| name|age|c_id|\n+---+-----+---+----+\n|  1| andy| 20|   1|\n|  2| jeff| 23|   2|\n|  3|james| 18|   3|\n+---+-----+---+----+\n\n+----+------+\n|c_id|c_name|\n+----+------+\n|   1|   USA|\n|   2| China|\n+----+------+\n\n+----+---+----+---+------+\n|c_id| id|name|age|c_name|\n+----+---+----+---+------+\n|   1|  1|andy| 20|   USA|\n|   2|  2|jeff| 23| China|\n+----+---+----+---+------+\n\n+---+----+---+----+----+------+\n| id|name|age|c_id|c [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308071_2037234671",
+      "id": "20180530-130126_1642948432",
+      "dateCreated": "2020-01-21 15:55:08.071",
+      "dateStarted": "2020-02-01 21:19:08.590",
+      "dateFinished": "2020-02-01 21:19:09.891",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Join on Multiple Fields",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((\"andy\", 20, 1, 1), (\"jeff\", 23, 1, 2), (\"james\", 12, 2, 2))).toDF(\"name\", \"age\", \"key_1\", \"key_2\")\ndf1.show()\n\nval df2 \u003d spark.createDataFrame(Seq((1, 1, \"USA\"), (2, 2, \"China\"))).toDF(\"key_1\", \"key_2\", \"country\")\ndf2.show()\n\n// Join on 2 fields: key_1, key_2\n\n// You can pass a list of field name if the join field names are the same in both tables\nval df3 \u003d df1.join(df2, Seq(\"ke [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:09.910",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+-----+-----+\n| name|age|key_1|key_2|\n+-----+---+-----+-----+\n| andy| 20|    1|    1|\n| jeff| 23|    1|    2|\n|james| 12|    2|    2|\n+-----+---+-----+-----+\n\n+-----+-----+-------+\n|key_1|key_2|country|\n+-----+-----+-------+\n|    1|    1|    USA|\n|    2|    2|  China|\n+-----+-----+-------+\n\n+-----+-----+-----+---+-------+\n|key_1|key_2| name|age|country|\n+-----+-----+-----+---+-------+\n|    1|    1| andy| 20|    USA|\n|    2|    2|james| 12 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308071_-1986687979",
+      "id": "20180530-135600_354945835",
+      "dateCreated": "2020-01-21 15:55:08.071",
+      "dateStarted": "2020-02-01 21:19:09.919",
+      "dateFinished": "2020-02-01 21:19:10.830",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Use SQL directly",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n// call createOrReplaceTempView first if you want to query this DataFrame via sql\ndf1.createOrReplaceTempView(\"people\")\n// SparkSession.sql return DataFrame\nval df2 \u003d spark.sql(\"select name, age from people\")\ndf2.show()\n\n// You need to register udf if you want to use it in sql [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:10.832",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "+-----+---+\n| name|age|\n+-----+---+\n| andy| 20|\n| jeff| 23|\n|james| 18|\n+-----+---+\n\n+--------------+---+\n|UDF:udf1(name)|age|\n+--------------+---+\n|          ANDY| 20|\n|          JEFF| 23|\n|         JAMES| 18|\n+--------------+---+\n\n\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n\u001b[1m\u001b[34mdf2\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFra [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308071_-1489550728",
+      "id": "20180530-132023_995737505",
+      "dateCreated": "2020-01-21 15:55:08.071",
+      "dateStarted": "2020-02-01 21:19:10.839",
+      "dateFinished": "2020-02-01 21:19:21.166",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset",
+      "text": "%md\n\nThere\u0027s 2 approaches to visuliaze DataFrame/Dataset in Zeppelin\n\n* Use SparkSQLInterpreter via `%spark.sql`\n* Use ZeppelinContext via `z.show`\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:55:08.071",
+      "config": {
+        "tableHide": false,
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true,
+          "completionKey": "TAB",
+          "completionSupport": false
+        },
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/markdown",
+        "fontSize": 9.0,
+        "editorHide": true,
+        "title": true,
+        "results": {},
+        "enabled": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThere\u0026rsquo;s 2 approaches to visuliaze DataFrame/Dataset in Zeppelin\u003c/p\u003e\n\u003cul\u003e\n  \u003cli\u003eUse SparkSQLInterpreter via \u003ccode\u003e%spark.sql\u003c/code\u003e\u003c/li\u003e\n  \u003cli\u003eUse ZeppelinContext via \u003ccode\u003ez.show\u003c/code\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308071_11859357",
+      "id": "20180530-132128_2114955642",
+      "dateCreated": "2020-01-21 15:55:08.071",
+      "status": "READY"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset via z.show",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\nval df2 \u003d df1.groupBy(\"country\").count()\nz.show(df2)\n\n\n\n\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:52.949",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "table",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                }
+              },
+              "commonSetting": {}
+            }
+          },
+          "1": {
+            "graph": {
+              "mode": "multiBarChart",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                },
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "country",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "count",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
+            },
+            "helium": {}
+          }
+        },
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "country\tcount\nChina\t1\nUSA\t2\n"
+          },
+          {
+            "type": "TEXT",
+            "data": "\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n\u001b[1m\u001b[34mdf2\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [country: string, count: bigint]\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308072_-324341501",
+      "id": "20180530-132634_1285621466",
+      "dateCreated": "2020-01-21 15:55:08.072",
+      "dateStarted": "2020-02-01 21:19:50.607",
+      "dateFinished": "2020-02-01 21:19:51.673",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Visualize DataFrame/Dataset via %spark.sql",
+      "text": "%spark\n\nval df1 \u003d spark.createDataFrame(Seq((1, \"andy\", 20, \"USA\"), (2, \"jeff\", 23, \"China\"), (3, \"james\", 18, \"USA\"))).toDF(\"id\", \"name\", \"age\", \"country\")\n// register this DataFrame first before querying it via %spark.sql\ndf1.createOrReplaceTempView(\"people\")",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:52.264",
+      "config": {
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/scala",
+        "fontSize": 9.0,
+        "title": false,
+        "results": {},
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\u001b[1m\u001b[34mdf1\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.DataFrame\u001b[0m \u003d [id: int, name: string ... 2 more fields]\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308072_-1114338291",
+      "id": "20180530-132657_668624333",
+      "dateCreated": "2020-01-21 15:55:08.072",
+      "dateStarted": "2020-02-01 21:19:52.271",
+      "dateFinished": "2020-02-01 21:19:52.604",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.sql\n\nselect country, count(1) as count from people group by country",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-01 21:19:54.634",
+      "config": {
+        "editorSetting": {
+          "language": "sql",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "colWidth": 6.0,
+        "editorMode": "ace/mode/sql",
+        "fontSize": 9.0,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "multiBarChart",
+              "height": 300.0,
+              "optionOpen": false,
+              "setting": {
+                "table": {
+                  "tableGridState": {
+                    "columns": [
+                      {
+                        "name": "country",
+                        "visible": true,
+                        "width": "*",
+                        "sort": {},
+                        "filters": [
+                          {}
+                        ],
+                        "pinned": ""
+                      },
+                      {
+                        "name": "count",
+                        "visible": true,
+                        "width": "*",
+                        "sort": {},
+                        "filters": [
+                          {}
+                        ],
+                        "pinned": ""
+                      }
+                    ],
+                    "scrollFocus": {},
+                    "selection": [],
+                    "grouping": {
+                      "grouping": [],
+                      "aggregations": [],
+                      "rowExpandedStates": {}
+                    },
+                    "treeView": {},
+                    "pagination": {
+                      "paginationCurrentPage": 1.0,
+                      "paginationPageSize": 250.0
+                    }
+                  },
+                  "tableColumnTypeState": {
+                    "names": {
+                      "country": "string",
+                      "count": "number"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                },
+                "multiBarChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default",
+                  "stacked": false
+                },
+                "stackedAreaChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                },
+                "lineChart": {
+                  "rotate": {
+                    "degree": "-45"
+                  },
+                  "xLabelStatus": "default"
+                }
+              },
+              "commonSetting": {},
+              "keys": [
+                {
+                  "name": "country",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "values": [
+                {
+                  "name": "count",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ]
+            },
+            "helium": {}
+          }
+        },
+        "enabled": true,
+        "runOnSelectionChange": true,
+        "title": false,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "country\tcount\nChina\t1\nUSA\t2\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308072_-1957405672",
+      "id": "20180530-132823_944494152",
+      "dateCreated": "2020-01-21 15:55:08.072",
+      "dateStarted": "2020-02-01 21:19:54.655",
+      "dateFinished": "2020-02-01 21:19:55.376",
+      "status": "FINISHED"
+    },
+    {
+      "title": "",
+      "text": "%spark.sql\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-01-21 15:55:08.072",
+      "config": {},
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1579593308072_972780641",
+      "id": "20180530-132849_1305166760",
+      "dateCreated": "2020-01-21 15:55:08.072",
+      "status": "READY"
+    }
+  ],
+  "name": "Spark SQL Tutorial (Scala)",
+  "id": "2EYUV26VR",
+  "defaultInterpreterGroup": "spark",
+  "version": "0.9.0-SNAPSHOT",
+  "permissions": {},
+  "noteParams": {},
+  "noteForms": {},
+  "angularObjects": {},
+  "config": {
+    "isZeppelinNotebookCronEnable": true
+  },
+  "info": {
+    "isRunning": false
+  },
+  "path": "/Spark Tutorial/Spark SQL Tutorial (Scala)"
+}
\ No newline at end of file
diff --git a/notebook/Spark Tutorial/SparkR Basics_2BWJFTXKM.zpln b/notebook/Spark Tutorial/SparkR Basics_2BWJFTXKM.zpln
new file mode 100644
index 0000000..3f83280
--- /dev/null
+++ b/notebook/Spark Tutorial/SparkR Basics_2BWJFTXKM.zpln	
@@ -0,0 +1,1119 @@
+{
+  "paragraphs": [
+    {
+      "title": "Overview",
+      "text": "%md\n\nRegarding using R in Zeppelin, you can refer the R tutorial. This tutorial is for using SparkR in Zeppelin, where you not only be able to use all the R features, but also can use Spark.\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:15:03.835",
+      "config": {
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "text",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/text",
+        "title": true,
+        "editorHide": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eRegarding using R in Zeppelin, you can refer the R tutorial. This tutorial is for using SparkR in Zeppelin, where you not only be able to use all the R features, but also can use Spark.\u003c/p\u003e\n\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1581045239881_1714679133",
+      "id": "paragraph_1581045239881_1714679133",
+      "dateCreated": "2020-02-07 11:13:59.881",
+      "dateStarted": "2020-02-07 11:15:00.785",
+      "dateFinished": "2020-02-07 11:15:02.121",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Hello R",
+      "text": "%spark.r\n\nfoo \u003c- TRUE\nprint(foo)\nbare \u003c- c(1, 2.5, 4)\nprint(bare)\ndouble \u003c- 15.0\nprint(double)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:11.968",
+      "config": {
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/r",
+        "enabled": true,
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 84.64583587646484,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0,
+        "runOnSelectionChange": true,
+        "checkEmpty": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\n[1] TRUE\n[1] 1.0 2.5 4.0\n[1] 15\n\n\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1429882946244_-381648689",
+      "id": "20150424-154226_261270952",
+      "dateCreated": "2015-04-24 03:42:26.000",
+      "dateStarted": "2020-02-07 11:19:03.923",
+      "dateFinished": "2020-02-07 11:19:17.639",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Load R Librairies",
+      "text": "%spark.r\n\nlibrary(data.table)\ndt \u003c- data.table(1:3)\nprint(dt)\nfor (i in 1:5) {\n  print(i*2)\n}\nprint(1:50)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:19.474",
+      "config": {
+        "colWidth": 12.0,
+        "editorMode": "ace/mode/r",
+        "enabled": true,
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 193.33334350585938,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\nV1\n1:  1\n2:  2\n3:  3\n[1] 2\n[1] 4\n[1] 6\n[1] 8\n[1] 10\n [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n[24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46\n[47] 47 48 49 50\n\n\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1429882976611_1352445253",
+      "id": "20150424-154256_645296307",
+      "dateCreated": "2015-04-24 03:42:56.000",
+      "dateStarted": "2020-02-07 11:19:19.481",
+      "dateFinished": "2020-02-07 11:19:19.650",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Load Iris Dataset",
+      "text": "%r\ncolnames(iris)\niris$Petal.Length\niris$Sepal.Length",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:24.707",
+      "config": {
+        "colWidth": 12.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 169.33334350585938,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\n[1] “Sepal.Length” “Sepal.Width”  “Petal.Length” “Petal.Width” \n[5] “Species”\n  [1] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 1.5 1.6 1.4 1.1 1.2 1.5 1.3\n [18] 1.4 1.7 1.5 1.7 1.5 1.0 1.7 1.9 1.6 1.6 1.5 1.4 1.6 1.6 1.5 1.5 1.4\n [35] 1.5 1.2 1.3 1.4 1.3 1.5 1.3 1.3 1.3 1.6 1.9 1.4 1.6 1.4 1.5 1.4 4.7\n [52] 4.5 4.9 4.0 4.6 4.5 4.7 3.3 4.6 3.9 3.5 4.2 4.0 4.7 3.6 4.4 4.5 4.1\n [69] 4.5 3.9 4.8 4.0 4.9 4.7 4.3 4.4 4.8 5.0 4.5 3.5 3.8 3.7 3.9 5.1 4.5\n [86] 4.5 4.7  [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455138077044_161383897",
+      "id": "20160210-220117_115873183",
+      "dateCreated": "2016-02-10 10:01:17.000",
+      "dateStarted": "2020-02-07 11:19:24.714",
+      "dateFinished": "2020-02-07 11:19:24.788",
+      "status": "FINISHED"
+    },
+    {
+      "title": "TABLE Display",
+      "text": "%spark.r\n\nprint(\"%table name\\tsize\\nsmall\\t100\\nlarge\\t1000\")",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:29.844",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "title": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 408.6458435058594,
+              "optionOpen": false,
+              "keys": [
+                {
+                  "name": "name",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "values": [
+                {
+                  "name": "size",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "scatter": {
+                "xAxis": {
+                  "name": "name",
+                  "index": 0.0,
+                  "aggr": "sum"
+                },
+                "yAxis": {
+                  "name": "size",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              },
+              "setting": {
+                "table": {
+                  "tableGridState": {},
+                  "tableColumnTypeState": {
+                    "names": {
+                      "[1] name": "string",
+                      "size": "string"
+                    },
+                    "updated": false
+                  },
+                  "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer [...]
+                  "tableOptionValue": {
+                    "useFilter": false,
+                    "showPagination": false,
+                    "showAggregationFooter": false
+                  },
+                  "updated": false,
+                  "initialized": false
+                }
+              },
+              "commonSetting": {}
+            },
+            "helium": {}
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "[1] name\tsize\nsmall\t100\nlarge\t1000"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1456216582752_6855525",
+      "id": "20160223-093622_330111284",
+      "dateCreated": "2016-02-23 09:36:22.000",
+      "dateStarted": "2020-02-07 11:19:29.382",
+      "dateFinished": "2020-02-07 11:19:29.584",
+      "status": "FINISHED"
+    },
+    {
+      "title": "HTML Display",
+      "text": "%spark.r \n\nprint(\"%html \u003ch3\u003eHello HTML\u003c/h3\u003e\")\nprint(\"\u003cfont color\u003d\u0027blue\u0027\u003e\u003cspan class\u003d\u0027fa fa-bars\u0027\u003e Easy...\u003c/font\u003e\u003c/span\u003e\")\nfor (i in 1:10) {\n  print(paste0(\"\u003ch4\u003e\", i, \" * 2 \u003d \", i*2, \"\u003c/h4\u003e\"))\n}\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:31.399",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 361.66668701171875,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cp\u003e[1] \u003c/p\u003e\u003ch3\u003eHello HTML\u003c/h3\u003e\u003cfont color\u003d\"blue\"\u003e\u003cspan class\u003d\"fa fa-bars\"\u003e Easy…\u003c/span\u003e\u003c/font\u003e\u003ch4\u003e1 * 2 \u003d 2\u003c/h4\u003e\u003ch4\u003e2 * 2 \u003d 4\u003c/h4\u003e\u003ch4\u003e3 * 2 \u003d 6\u003c/h4\u003e\u003ch4\u003e4 * 2 \u003d 8\u003c/h4\u003e\u003ch4\u003e5 * 2 \u003d 10\u003c/h4\u003e\u003ch4\u003e6 * 2 \u003d 12\u003c/h4\u003e\u003ch4\u003e7 * 2 \u0 [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1456140102445_51059930",
+      "id": "20160222-122142_1323614681",
+      "dateCreated": "2016-02-22 12:21:42.000",
+      "dateStarted": "2020-02-07 11:19:31.407",
+      "dateFinished": "2020-02-07 11:19:31.502",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create a Spark Dataframe",
+      "text": "%spark\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\nval bankText \u003d sc.parallelize(\n    IOUtils.toString(\n        new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n        Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank \u003d bankText.map(s \u003d\u003e s.split(\";\")).filter(s \u003 [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:34.336",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "lineNumbers": false,
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 91.27083587646484,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/scala",
+        "editorHide": false,
+        "tableHide": false,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\u001b[33mwarning: \u001b[0mthere was one deprecation warning; re-run with -deprecation for details\nimport sqlContext.implicits._\nimport org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\u001b[1m\u001b[34mbankText\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.rdd.RDD[String]\u001b[0m \u003d ParallelCollectionRDD[0] at parallelize at \u003cconsole\u003e:20\ndefined class Bank\n\u001b[1m\u001b[34mbank\u001b[0m: \u001b[1m\u001b[32mo [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455142039343_-233762796",
+      "id": "20160210-230719_2111095838",
+      "dateCreated": "2016-02-10 11:07:19.000",
+      "dateStarted": "2020-02-07 11:19:34.345",
+      "dateFinished": "2020-02-07 11:19:44.256",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Read the Spark Dataframe from R",
+      "text": "%r\n\ndf \u003c- sql(sqlContext, \"select count(*) from bank\")\nprintSchema(df)\nSparkR::head(df)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:45.723",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "tableHide": false,
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 110.64583587646484,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\nroot\n |– count(1): long (nullable \u003d false)\n  count(1)\n1     4521\n\n\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455142043062_1598026718",
+      "id": "20160210-230723_1811469598",
+      "dateCreated": "2016-02-10 11:07:23.000",
+      "dateStarted": "2020-02-07 11:19:45.729",
+      "dateFinished": "2020-02-07 11:19:51.131",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Create a R Dataframe",
+      "text": "%spark.r \n\nlocalNames \u003c- data.frame(name\u003dc(\"John\", \"Smith\", \"Sarah\"), budget\u003dc(19, 53, 18))\nnames \u003c- createDataFrame(sqlContext, localNames)\nprintSchema(names)\nregisterTempTable(names, \"names\")\n\n# SparkR::head(names)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:19:54.525",
+      "config": {
+        "colWidth": 12.0,
+        "enabled": true,
+        "title": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 84.64583587646484,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorHide": false,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\nroot\n |– name: string (nullable \u003d true)\n |– budget: double (nullable \u003d true)\n\n\n\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455142112413_519883679",
+      "id": "20160210-230832_1847721959",
+      "dateCreated": "2016-02-10 11:08:32.000",
+      "dateStarted": "2020-02-07 11:19:54.531",
+      "dateFinished": "2020-02-07 11:19:54.834",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Read the R Dataframe from Spark",
+      "text": "sqlContext.sql(\"select * from names\").head",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:07.435",
+      "config": {
+        "colWidth": 12.0,
+        "enabled": true,
+        "editorMode": "ace/mode/scala",
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 92.64583587646484,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "scala",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorHide": false,
+        "tableHide": false,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "\u001b[1m\u001b[34mres4\u001b[0m: \u001b[1m\u001b[32morg.apache.spark.sql.Row\u001b[0m \u003d [John,19.0]\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455188357108_95477841",
+      "id": "20160211-115917_445850505",
+      "dateCreated": "2016-02-11 11:59:17.000",
+      "dateStarted": "2020-02-07 11:20:07.442",
+      "dateFinished": "2020-02-07 11:20:08.663",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Query the R Datafame with SQL",
+      "text": "%spark.sql\n\nselect * from names\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:11.636",
+      "config": {
+        "colWidth": 12.0,
+        "enabled": true,
+        "editorMode": "ace/mode/sql",
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "pieChart",
+              "height": 263.3125,
+              "optionOpen": false,
+              "keys": [
+                {
+                  "name": "name",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              ],
+              "values": [
+                {
+                  "name": "budget",
+                  "index": 1.0,
+                  "aggr": "sum"
+                }
+              ],
+              "groups": [],
+              "scatter": {
+                "xAxis": {
+                  "name": "name",
+                  "index": 0.0,
+                  "aggr": "sum"
+                }
+              },
+              "setting": {
+                "multiBarChart": {}
+              },
+              "commonSetting": {}
+            },
+            "helium": {}
+          }
+        ],
+        "editorSetting": {
+          "language": "sql",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TABLE",
+            "data": "name\tbudget\nJohn\t19.0\nSmith\t53.0\nSarah\t18.0\n"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455142115582_-1840950897",
+      "id": "20160210-230835_19876971",
+      "dateCreated": "2016-02-10 11:08:35.000",
+      "dateStarted": "2020-02-07 11:20:11.643",
+      "dateFinished": "2020-02-07 11:20:11.907",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GoogleVis: Bar Chart",
+      "text": "%spark.r\n\nlibrary(googleVis)\ndf\u003ddata.frame(country\u003dc(\"US\", \"GB\", \"BR\"), \n              val1\u003dc(10,13,14), \n              val2\u003dc(23,12,32))\nBar \u003c- gvisBarChart(df)\nprint(Bar, tag \u003d \u0027chart\u0027)\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:18.341",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "table",
+              "height": 300.0,
+              "optionOpen": false
+            }
+          }
+        },
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorMode": "ace/mode/r",
+        "editorHide": false,
+        "tableHide": false,
+        "title": true,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\u003c!-- BarChart generated in R 3.5.2 by googleVis 0.6.4 package --\u003e\n\n\u003c!-- Fri Feb  7 11:20:18 2020 --\u003e\n\n\u003c!-- jsHeader --\u003e\n\n\u003cscript type\u003d\"text/javascript\"\u003e\n \n// jsData \nfunction gvisDataBarChartID85be6a990d8d () {\nvar data \u003d new google.visualization.DataTable();\nvar datajson \u003d\n[\n [\n\"US\",\n10,\n23\n],\n[\n\"GB\",\n13,\n12\n],\n[\n\"BR\",\n14,\n32\n] \n];\ndata.addColumn(\u0027string\u0027,\u0027co [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1485626417184_-1153542135",
+      "id": "20170129-030017_426747323",
+      "dateCreated": "2017-01-29 03:00:17.000",
+      "dateStarted": "2020-02-07 11:20:18.347",
+      "dateFinished": "2020-02-07 11:20:18.457",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GoogleVis: Candlestick Chart",
+      "text": "%spark.r\n\nlibrary(googleVis)\n\nCandle \u003c- gvisCandlestickChart(OpenClose, \n                               options\u003dlist(legend\u003d\u0027none\u0027))\n\nprint(Candle, tag \u003d \u0027chart\u0027)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:24.120",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "results": {
+          "0": {
+            "graph": {
+              "mode": "table",
+              "height": 84.64583587646484,
+              "optionOpen": false
+            }
+          }
+        },
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorMode": "ace/mode/r",
+        "editorHide": false,
+        "tableHide": false,
+        "title": true,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\u003c!-- CandlestickChart generated in R 3.5.2 by googleVis 0.6.4 package --\u003e\n\n\u003c!-- Fri Feb  7 11:20:24 2020 --\u003e\n\n\u003c!-- jsHeader --\u003e\n\n\u003cscript type\u003d\"text/javascript\"\u003e\n \n// jsData \nfunction gvisDataCandlestickChartID85be71c1d87e () {\nvar data \u003d new google.visualization.DataTable();\nvar datajson \u003d\n[\n [\n\"Mon\",\n20,\n28,\n38,\n45\n],\n[\n\"Tues\",\n31,\n38,\n55,\n66\n],\n[\n\"Wed\",\n50,\n55,\n77,\n80\n [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1485627113560_-130863711",
+      "id": "20170129-031153_758721410",
+      "dateCreated": "2017-01-29 03:11:53.000",
+      "dateStarted": "2020-02-07 11:20:24.126",
+      "dateFinished": "2020-02-07 11:20:24.183",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GoogleVis: Line chart",
+      "text": "%spark.r\n\nlibrary(googleVis)\ndf\u003ddata.frame(country\u003dc(\"US\", \"GB\", \"BR\"), \n              val1\u003dc(10,13,14), \n              val2\u003dc(23,12,32))\n\nLine \u003c- gvisLineChart(df)\n\nprint(Line, tag \u003d \u0027chart\u0027)\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:26.830",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 61.458335876464844,
+              "optionOpen": false
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorHide": false,
+        "tableHide": false,
+        "title": true,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\u003c!-- LineChart generated in R 3.5.2 by googleVis 0.6.4 package --\u003e\n\n\u003c!-- Fri Feb  7 11:20:26 2020 --\u003e\n\n\u003c!-- jsHeader --\u003e\n\n\u003cscript type\u003d\"text/javascript\"\u003e\n \n// jsData \nfunction gvisDataLineChartID85be2cbc9ee () {\nvar data \u003d new google.visualization.DataTable();\nvar datajson \u003d\n[\n [\n\"US\",\n10,\n23\n],\n[\n\"GB\",\n13,\n12\n],\n[\n\"BR\",\n14,\n32\n] \n];\ndata.addColumn(\u0027string\u0027,\u0027c [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455138857313_92355963",
+      "id": "20160210-221417_1400405266",
+      "dateCreated": "2016-02-10 10:14:17.000",
+      "dateStarted": "2020-02-07 11:20:26.837",
+      "dateFinished": "2020-02-07 11:20:26.897",
+      "status": "FINISHED"
+    },
+    {
+      "text": "%spark.r\n\npairs(iris)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:30.769",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 1857.0,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cp\u003e\u003cimg src\u003d\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA/AAAAPwCAYAAACWX5UOAAAEGWlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPrtzZyMkzlNsNIV0qD8NJQ2TVjShtLp/3d02bpZJNtoi6GT27s6Yyc44M7v9oU9FUHwx6psUxL+3gCAo9Q/bPrQvlQol2tQgKD60+INQ6Ium65k7M5lpurHeZe58853vnnvuuWfvBei5qliWkRQBFpquLRcy4nOHj4g9K5CEh6AXBqFXUR0rXalMAjZPC3e1W99Dwntf2dXd/p+tt0YdFSBxH2Kz5qgLiI8B8KdVy3YBevqRHz/qWh72Yui3MUDEL3q44WPXw3M+fo1pZuQs4tOIBVVTaoiXEI/MxfhGDPsxsNZfoE1q66ro5aJ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455137735427_-1023869289",
+      "id": "20160210-215535_1815168219",
+      "dateCreated": "2016-02-10 09:55:35.000",
+      "dateStarted": "2020-02-07 11:20:30.787",
+      "dateFinished": "2020-02-07 11:20:31.622",
+      "status": "FINISHED"
+    },
+    {
+      "text": "%spark.r\n\nplot(iris, col \u003d heat.colors(3))",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:32.675",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 399.66668701171875,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cp\u003e\u003cimg src\u003d\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA/AAAAPwCAYAAACWX5UOAAAEGWlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPrtzZyMkzlNsNIV0qD8NJQ2TVjShtLp/3d02bpZJNtoi6GT27s6Yyc44M7v9oU9FUHwx6psUxL+3gCAo9Q/bPrQvlQol2tQgKD60+INQ6Ium65k7M5lpurHeZe58853vnnvuuWfvBei5qliWkRQBFpquLRcy4nOHj4g9K5CEh6AXBqFXUR0rXalMAjZPC3e1W99Dwntf2dXd/p+tt0YdFSBxH2Kz5qgLiI8B8KdVy3YBevqRHz/qWh72Yui3MUDEL3q44WPXw3M+fo1pZuQs4tOIBVVTaoiXEI/MxfhGDPsxsNZfoE1q66ro5aJ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455137737773_-549089146",
+      "id": "20160210-215537_582262164",
+      "dateCreated": "2016-02-10 09:55:37.000",
+      "dateStarted": "2020-02-07 11:20:32.706",
+      "dateFinished": "2020-02-07 11:20:33.272",
+      "status": "FINISHED"
+    },
+    {
+      "text": "%spark.r\n\nlibrary(ggplot2)\npres_rating \u003c- data.frame(\n  rating \u003d as.numeric(presidents),\n  year \u003d as.numeric(floor(time(presidents))),\n  quarter \u003d as.numeric(cycle(presidents))\n)\np \u003c- ggplot(pres_rating, aes(x\u003dyear, y\u003dquarter, fill\u003drating))\np + geom_raster()",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:34.523",
+      "config": {
+        "colWidth": 4.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 449.66668701171875,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorHide": false,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cp\u003e\u003cimg src\u003d\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA/AAAAPwCAYAAACWX5UOAAAEGWlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPrtzZyMkzlNsNIV0qD8NJQ2TVjShtLp/3d02bpZJNtoi6GT27s6Yyc44M7v9oU9FUHwx6psUxL+3gCAo9Q/bPrQvlQol2tQgKD60+INQ6Ium65k7M5lpurHeZe58853vnnvuuWfvBei5qliWkRQBFpquLRcy4nOHj4g9K5CEh6AXBqFXUR0rXalMAjZPC3e1W99Dwntf2dXd/p+tt0YdFSBxH2Kz5qgLiI8B8KdVy3YBevqRHz/qWh72Yui3MUDEL3q44WPXw3M+fo1pZuQs4tOIBVVTaoiXEI/MxfhGDPsxsNZfoE1q66ro5aJ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1438930880648_-1572054429",
+      "id": "20150807-090120_1060568667",
+      "dateCreated": "2015-08-07 09:01:20.000",
+      "dateStarted": "2020-02-07 11:20:34.534",
+      "dateFinished": "2020-02-07 11:20:35.510",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GoogleViz: Bubble Chart",
+      "text": "%spark.r\n\nlibrary(googleVis)\nbubble \u003c- gvisBubbleChart(Fruits, idvar\u003d\"Fruit\", \n                          xvar\u003d\"Sales\", yvar\u003d\"Expenses\",\n                          colorvar\u003d\"Year\", sizevar\u003d\"Profit\",\n                          options\u003dlist(\n                            hAxis\u003d\u0027{minValue:75, maxValue:125}\u0027))\nprint(bubble, tag \u003d \u0027chart\u0027)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:37.653",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "title": true,
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 189.6666717529297,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorHide": false,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\u003c!-- BubbleChart generated in R 3.5.2 by googleVis 0.6.4 package --\u003e\n\n\u003c!-- Fri Feb  7 11:20:37 2020 --\u003e\n\n\u003c!-- jsHeader --\u003e\n\n\u003cscript type\u003d\"text/javascript\"\u003e\n \n// jsData \nfunction gvisDataBubbleChartID85be4838c6df () {\nvar data \u003d new google.visualization.DataTable();\nvar datajson \u003d\n[\n [\n\"Apples\",\n98,\n78,\n2008,\n20\n],\n[\n\"Apples\",\n111,\n79,\n2009,\n32\n],\n[\n\"Apples\",\n89,\n76,\n2010,\ [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455141578555_-1713165000",
+      "id": "20160210-225938_1538591791",
+      "dateCreated": "2016-02-10 10:59:38.000",
+      "dateStarted": "2020-02-07 11:20:37.663",
+      "dateFinished": "2020-02-07 11:20:37.726",
+      "status": "FINISHED"
+    },
+    {
+      "title": "GoogleViz: Geo Chart",
+      "text": "%spark.r\n\nlibrary(googleVis)\ngeo \u003d gvisGeoChart(Exports, locationvar \u003d \"Country\", colorvar\u003d\"Profit\", options\u003dlist(Projection \u003d \"kavrayskiy-vii\"))\nprint(geo, tag \u003d \u0027chart\u0027)",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-07 11:20:39.797",
+      "config": {
+        "colWidth": 6.0,
+        "enabled": true,
+        "editorMode": "ace/mode/r",
+        "results": [
+          {
+            "graph": {
+              "mode": "table",
+              "height": 336.66668701171875,
+              "optionOpen": false,
+              "keys": [],
+              "values": [],
+              "groups": [],
+              "scatter": {}
+            }
+          }
+        ],
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionSupport": false,
+          "completionKey": "TAB"
+        },
+        "editorHide": false,
+        "title": true,
+        "fontSize": 9.0
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\n\u003c!-- GeoChart generated in R 3.5.2 by googleVis 0.6.4 package --\u003e\n\n\u003c!-- Fri Feb  7 11:20:39 2020 --\u003e\n\n\u003c!-- jsHeader --\u003e\n\n\u003cscript type\u003d\"text/javascript\"\u003e\n \n// jsData \nfunction gvisDataGeoChartID85be5db99ee1 () {\nvar data \u003d new google.visualization.DataTable();\nvar datajson \u003d\n[\n [\n\"Germany\",\n3\n],\n[\n\"Brazil\",\n4\n],\n[\n\"United States\",\n5\n],\n[\n\"France\",\n4\n],\n[\n\"Hungary\",\n3\n] [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1455140544963_1486338978",
+      "id": "20160210-224224_735421242",
+      "dateCreated": "2016-02-10 10:42:24.000",
+      "dateStarted": "2020-02-07 11:20:39.806",
+      "dateFinished": "2020-02-07 11:20:39.871",
+      "status": "FINISHED"
+    },
+    {
+      "text": "%md\n\n## Congratulations, it\u0027s done.\n### You can create your own notebook in \u0027Notebook\u0027 menu. Good luck!",
+      "user": "anonymous",
+      "dateUpdated": "2017-01-29 03:12:06.000",
+      "config": {
+        "colWidth": 12.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "markdown",
+          "editOnDblClick": true
+        },
+        "editorMode": "ace/mode/markdown",
+        "editorHide": true,
+        "tableHide": false
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch2\u003eCongratulations, it\u0026rsquo;s done.\u003c/h2\u003e\n\u003ch3\u003eYou can create your own notebook in \u0026lsquo;Notebook\u0026rsquo; menu. Good luck!\u003c/h3\u003e\n\u003c/div\u003e"
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1485626988585_-946362813",
+      "id": "20170129-030948_1379298104",
+      "dateCreated": "2017-01-29 03:09:48.000",
+      "dateStarted": "2017-01-29 03:12:06.000",
+      "dateFinished": "2017-01-29 03:12:06.000",
+      "status": "FINISHED"
+    }
+  ],
+  "name": "SparkR Basics",
+  "id": "2BWJFTXKM",
+  "defaultInterpreterGroup": "spark",
+  "permissions": {},
+  "noteParams": {},
+  "noteForms": {},
+  "angularObjects": {},
+  "config": {
+    "looknfeel": "default",
+    "isZeppelinNotebookCronEnable": true
+  },
+  "info": {},
+  "path": "/Spark Tutorial/SparkR Basics"
+}
\ No newline at end of file
diff --git a/notebook/Spark Tutorial/SparkR Shiny App_2F1CHQ4TT.zpln b/notebook/Spark Tutorial/SparkR Shiny App_2F1CHQ4TT.zpln
new file mode 100644
index 0000000..acbeff6
--- /dev/null
+++ b/notebook/Spark Tutorial/SparkR Shiny App_2F1CHQ4TT.zpln	
@@ -0,0 +1,217 @@
+{
+  "paragraphs": [
+    {
+      "title": "Introduction",
+      "text": "%md\n\n[Shiny](https://shiny.rstudio.com/tutorial/) is an R package that makes it easy to build interactive web applications (apps) straight from R. For developing one Shiny App in Zeppelin, you need to at least 3 paragraphs (server paragraph, ui paragraph and run type paragraph). User are not only able to build shiny app in R interpreter, but also in SparkR interpreter where you can use Spark.",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:26:32.533",
+      "config": {
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "text",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/text",
+        "editorHide": true,
+        "title": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003e\u003ca href\u003d\"https://shiny.rstudio.com/tutorial/\"\u003eShiny\u003c/a\u003e is an R package that makes it easy to build interactive web applications (apps) straight from R. For developing one Shiny App in Zeppelin, you need to at least 3 paragraphs (server paragraph, ui paragraph and run type paragraph). User are not only able to build shiny app in R interpreter, but also in SparkR interpreter where yo [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1580981119260_-2080233417",
+      "id": "paragraph_1580981119260_-2080233417",
+      "dateCreated": "2020-02-06 17:25:19.260",
+      "dateStarted": "2020-02-06 17:26:18.906",
+      "dateFinished": "2020-02-06 17:26:20.705",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Shiny Server",
+      "text": "%spark.shiny(type\u003dserver)\n\n# Define server logic to summarize and view selected dataset ----\nserver \u003c- function(input, output) {\n\n    # Return the requested dataset ----\n    datasetInput \u003c- reactive({\n        switch(input$dataset,\n        \"rock\" \u003d as.DataFrame(rock),\n        \"pressure\" \u003d as.DataFrame(pressure),\n        \"cars\" \u003d as.DataFrame(cars))\n    })\n\n    # Generate a summary of the dataset ----\n    output$summary \u003 [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:28:14.258",
+      "config": {
+        "colWidth": 6.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/r",
+        "type": "server",
+        "title": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "Write server.R to /var/folders/99/mfxjjdtj7x70rvwgkxpq4jm40000gn/T/zeppelin-shiny1023768872015716789 successfully."
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1580981178904_-1694112525",
+      "id": "paragraph_1580981178904_-1694112525",
+      "dateCreated": "2020-02-06 17:26:18.904",
+      "dateStarted": "2020-02-06 17:28:14.264",
+      "dateFinished": "2020-02-06 17:28:33.284",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Shiny UI",
+      "text": "%spark.shiny(type\u003dui)\n\n# Define UI for dataset viewer app ----\nui \u003c- fluidPage(\n\n    # App title ----\n    titlePanel(paste(\"Spark Version\", sparkR.version(), sep\u003d\":\")),\n\n    # Sidebar layout with a input and output definitions ----\n    sidebarLayout(\n\n        # Sidebar panel for inputs ----\n        sidebarPanel(\n\n            # Input: Selector for choosing dataset ----\n            selectInput(inputId \u003d \"dataset\",\n            label \ [...]
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:31:47.873",
+      "config": {
+        "colWidth": 6.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/r",
+        "type": "ui",
+        "title": true
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "SUCCESS",
+        "msg": [
+          {
+            "type": "TEXT",
+            "data": "Write ui.R to /var/folders/99/mfxjjdtj7x70rvwgkxpq4jm40000gn/T/zeppelin-shiny1023768872015716789 successfully."
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1580981253412_-1490669900",
+      "id": "paragraph_1580981253412_-1490669900",
+      "dateCreated": "2020-02-06 17:27:33.412",
+      "dateStarted": "2020-02-06 17:30:26.620",
+      "dateFinished": "2020-02-06 17:30:26.630",
+      "status": "FINISHED"
+    },
+    {
+      "title": "Shiny App",
+      "text": "%spark.shiny(type\u003drun)\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:31:58.385",
+      "config": {
+        "colWidth": 12.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/r",
+        "title": true,
+        "type": "run"
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "results": {
+        "code": "ERROR",
+        "msg": [
+          {
+            "type": "HTML",
+            "data": "\u003ciframe src\u003d\"http://0.0.0.0:56065\" height \u003d\"500px\" width\u003d\"100%\" frameBorder\u003d\"0\"\u003e\u003c/iframe\u003e\n"
+          },
+          {
+            "type": "TEXT",
+            "data": "\nio.grpc.StatusRuntimeException: UNKNOWN: Exception iterating responses: \u0027payload\u0027\n\tat io.grpc.Status.asRuntimeException(Status.java:526)\n\tat io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onClose(ClientCalls.java:434)\n\tat io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39)\n\tat io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23)\n\tat io.grpc.ForwardingClientC [...]
+          }
+        ]
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1580981260311_-1138471389",
+      "id": "paragraph_1580981260311_-1138471389",
+      "dateCreated": "2020-02-06 17:27:40.311",
+      "dateStarted": "2020-02-06 17:30:37.041",
+      "dateFinished": "2020-02-06 17:30:52.885",
+      "status": "ABORT"
+    },
+    {
+      "text": "%spark.shiny\n",
+      "user": "anonymous",
+      "dateUpdated": "2020-02-06 17:27:40.307",
+      "config": {
+        "colWidth": 6.0,
+        "fontSize": 9.0,
+        "enabled": true,
+        "results": {},
+        "editorSetting": {
+          "language": "r",
+          "editOnDblClick": false,
+          "completionKey": "TAB",
+          "completionSupport": true
+        },
+        "editorMode": "ace/mode/r"
+      },
+      "settings": {
+        "params": {},
+        "forms": {}
+      },
+      "apps": [],
+      "progressUpdateIntervalMs": 500,
+      "jobName": "paragraph_1580981251266_390385714",
+      "id": "paragraph_1580981251266_390385714",
+      "dateCreated": "2020-02-06 17:27:31.266",
+      "status": "READY"
+    }
+  ],
+  "name": "SparkR Shiny App",
+  "id": "2F1CHQ4TT",
+  "defaultInterpreterGroup": "spark",
+  "version": "0.9.0-SNAPSHOT",
+  "permissions": {},
+  "noteParams": {},
+  "noteForms": {},
+  "angularObjects": {},
+  "config": {
+    "isZeppelinNotebookCronEnable": true
+  },
+  "info": {},
+  "path": "/Spark Tutorial/SparkR Shiny App"
+}
\ No newline at end of file