You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pd...@apache.org on 2020/11/23 15:50:49 UTC

[zeppelin] branch branch-0.9 updated: [ZEPPELIN-4977] Metrics and healthcheck

This is an automated email from the ASF dual-hosted git repository.

pdallig pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/branch-0.9 by this push:
     new 438dad2  [ZEPPELIN-4977] Metrics and healthcheck
438dad2 is described below

commit 438dad20bd6b1e898ffed806a6fd4f25a320f320
Author: Philipp Dallig <ph...@gmail.com>
AuthorDate: Mon Nov 16 16:42:43 2020 +0100

    [ZEPPELIN-4977] Metrics and healthcheck
    
    ### What is this PR for?
    This PR includes:
     - Configurable Prometheus monitoring with endpoint (`/metrics`)
     - Rewrite the JMX metric endpoint
     - two new Healthcheck endpoints (`/health/readiness`, `/health/liveness`) and a ping endpoint (`/ping`)
     - some default metrics (jetty, jvm, interpreter)
    
    ### What type of PR is it?
     - Improvement
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4977
    * https://issues.apache.org/jira/browse/ZEPPELIN-4976
    
    ### How should this be tested?
    * Travic-CI: https://travis-ci.com/github/Reamer/zeppelin/builds/201787227
    
    ### Questions:
    * Does the licenses files need update? Yes, included in PR
    * Is there breaking changes for older versions? Yes, the JMX output for metrics changes
    * Does this needs documentation? Yes, included in PR
    
    Author: Philipp Dallig <ph...@gmail.com>
    
    Closes #3971 from Reamer/metrics_and_healthcheck_micro and squashes the following commits:
    
    6dd4113c5 [Philipp Dallig] Add cron properties in configuration.md
    eed7ff57c [Philipp Dallig] Add CronJobs metrics
    103241a0d [Philipp Dallig] Rewrite JMX metric endpoint
    282865d8f [Philipp Dallig] Add jetty metrics
    6d8106052 [Philipp Dallig] Add Interpreter metrics
    e0757517c [Philipp Dallig] Add HDFS Healthcheck
    91d735b12 [Philipp Dallig] Add Healthchecks with Dropwizard
    14038c375 [Philipp Dallig] Add micrometer for metrics
    
    (cherry picked from commit 1e177dba411ae42bb8f06b6692030a5ba2f981b2)
    Signed-off-by: Philipp Dallig <ph...@gmail.com>
---
 docs/index.md                                      |   1 +
 docs/setup/operation/configuration.md              |  24 +++-
 docs/setup/operation/monitoring.md                 |  37 ++++++
 pom.xml                                            |   2 +
 zeppelin-distribution/src/bin_license/LICENSE      |  17 ++-
 zeppelin-interpreter-integration/pom.xml           |  12 --
 .../zeppelin/conf/ZeppelinConfiguration.java       |  18 ++-
 zeppelin-server/pom.xml                            |  63 ++++++---
 .../org/apache/zeppelin/metric/JVMInfoBinder.java  |  36 ++++++
 .../apache/zeppelin/metric/PrometheusServlet.java  |  61 +++++++++
 .../org/apache/zeppelin/server/ZeppelinServer.java | 143 +++++++++++++--------
 .../apache/zeppelin/metric/MetricEndpointTest.java |  68 ++++++++++
 zeppelin-zengine/pom.xml                           |  13 ++
 .../zeppelin/healthcheck/DummyHealthCheck.java     |  27 ++++
 .../zeppelin/healthcheck/HdfsHealthCheck.java      |  50 +++++++
 .../apache/zeppelin/healthcheck/HealthChecks.java  |  48 +++++++
 .../interpreter/InterpreterSettingManager.java     |  53 ++++++--
 .../zeppelin/notebook/scheduler/CronJob.java       |   8 +-
 .../notebook/scheduler/CronJobListener.java        |  63 +++++++++
 .../notebook/scheduler/QuartzSchedulerService.java |   1 +
 .../org/apache/zeppelin/storage/ConfigStorage.java |   4 +
 .../zeppelin/storage/FileSystemConfigStorage.java  |   3 +
 22 files changed, 653 insertions(+), 99 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index fcfb2e5..36301d0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -112,6 +112,7 @@ limitations under the License.
   * [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb)
 * Operation 
   * [Configuration](./setup/operation/configuration.html): lists for Apache Zeppelin
+  * [Monitoring](./setup/operation/monitoring.html): monitoring instructions for Apache Zeppelin
   * [Proxy Setting](./setup/operation/proxy_setting.html)
   * [Upgrading](./setup/operation/upgrading.html): a manual procedure of upgrading Apache Zeppelin version
   * [Trouble Shooting](./setup/operation/trouble_shooting.html)
diff --git a/docs/setup/operation/configuration.md b/docs/setup/operation/configuration.md
index 4f7c734..c745531 100644
--- a/docs/setup/operation/configuration.md
+++ b/docs/setup/operation/configuration.md
@@ -61,13 +61,13 @@ If both are defined, then the **environment variables** will take priority.
   </tr>
   <tr>
     <td><h6 class="properties">ZEPPELIN_JMX_ENABLE</h6></td>
-    <td><h6 class="properties">N/A</h6></td>
-    <td></td>
+    <td><h6 class="properties">zeppelin.jmx.enable</h6></td>
+    <td>false</td>
     <td>Enable JMX by defining "true"</td>
   </tr>
   <tr>
     <td><h6 class="properties">ZEPPELIN_JMX_PORT</h6></td>
-    <td><h6 class="properties">N/A</h6></td>
+    <td><h6 class="properties">zeppelin.jmx.port</h6></td>
     <td>9996</td>
     <td>Port number which JMX uses</td>
   </tr>
@@ -443,6 +443,24 @@ If both are defined, then the **environment variables** will take priority.
     <td>true</td>
     <td>Value to enable/disable timeout handling when starting Interpreter Pods. Caution: This can lead to an infinity loop</td>
   </tr>
+  <tr>
+    <td><h6 class="properties">ZEPPELIN_METRIC_ENABLE_PROMETHEUS</h6></td>
+    <td><h6 class="properties">zeppelin.metric.enable.prometheus</h6></td>
+    <td>false</td>
+    <td>Value to enable/disable Prometheus metric endpoint on /metric</td>
+  </tr>
+  <tr>
+    <td><h6 class="properties">ZEPPELIN_NOTEBOOK_CRON_ENABLE</h6></td>
+    <td><h6 class="properties">zeppelin.notebook.cron.enable</h6></td>
+    <td>false</td>
+    <td>Value to enable/disable Cron support in Notes</td>
+  </tr>
+  <tr>
+    <td><h6 class="properties">ZEPPELIN_NOTEBOOK_CRON_FOLDERS</h6></td>
+    <td><h6 class="properties">zeppelin.notebook.cron.folders</h6></td>
+    <td></td>
+    <td>comma-separated list of folder, where cron is allowed</td>
+  </tr>
 </table>
 
 
diff --git a/docs/setup/operation/monitoring.md b/docs/setup/operation/monitoring.md
new file mode 100644
index 0000000..538b115
--- /dev/null
+++ b/docs/setup/operation/monitoring.md
@@ -0,0 +1,37 @@
+---
+layout: page
+title: "Apache Zeppelin Monitoring"
+description: "This page shows you the monitoring options you have in Apache Zeppelin"
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# Apache Zeppelin Monitoring
+
+<div id="toc"></div>
+
+## Monitoring Options
+
+Apache Zeppelin is using [Micrometer](https://micrometer.io/) - a vendor-neutral application metrics facade.
+
+### Prometheus Monitoring
+
+[Prometheus](https://prometheus.io/) is the leading monitoring solution for [Kubernetes](https://kubernetes.io/). The Prometheus endpoint can be activated with the configuration property `zeppelin.metric.enable.prometheus`. The metrics are accessible via the unauthenticated endpoint `/metrics`.
+
+### JMX Monitoring
+
+[JMX](https://en.wikipedia.org/wiki/Java_Management_Extensions) is a general solution for monitoring Java applications. JMX can be activated with the configuration property `zeppelin.jmx.enable`. The default port 9996 can be changed with the configuration property `zeppelin.jmx.port`.
+
+## Healthcheck Probe
+
+Apache Zeppelin has two healthcheck related unauthenticated endpoints (`/health/readiness`, `/health/liveness`) that could be used for proxy and/or cloud setups.
diff --git a/pom.xml b/pom.xml
index bdafb27..262b5b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -140,6 +140,8 @@
     <joda.version>2.9.9</joda.version>
     <bouncycastle.version>1.60</bouncycastle.version>
     <maven.version>3.6.3</maven.version>
+    <dropwizard.version>4.1.14</dropwizard.version>
+    <micrometer.version>1.6.0</micrometer.version>
 
     <hadoop2.7.version>2.7.7</hadoop2.7.version>
     <hadoop2.6.version>2.6.5</hadoop2.6.version>
diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE
index 8d88b7c..532c765 100644
--- a/zeppelin-distribution/src/bin_license/LICENSE
+++ b/zeppelin-distribution/src/bin_license/LICENSE
@@ -52,9 +52,9 @@ The following components are provided under Apache License.
     (Apache 2.0) Codehaus Plexus Utils (org.codehaus.plexus:plexus-utils:3.2.1 - http://github.com/codehaus-plexus/plexus-utils)
     (Apache 2.0) findbugs jsr305 (com.google.code.findbugs:jsr305:jar:1.3.9 - http://findbugs.sourceforge.net/)
     (Apache 2.0) Google Guava (com.google.guava:guava:15.0 - https://code.google.com/p/guava-libraries/)
-    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.7.0 - https://github.com/FasterXML/jackson-core)
-    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.9.9 - https://github.com/FasterXML/jackson-core)
-    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.9.9.1 - https://github.com/FasterXML/jackson-core)
+    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.9.10 - https://github.com/FasterXML/jackson-core)
+    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.9.10 - https://github.com/FasterXML/jackson-core)
+    (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.9.10.6 - https://github.com/FasterXML/jackson-core)
     (Apache 2.0) Jackson Mapper ASL (org.codehaus.jackson:jackson-mapper-asl:1.9.13 - https://mvnrepository.com/artifact/org.codehaus.jackson/jackson-mapper-asl/1.9.13)
     (Apache 2.0) javax.servlet (org.eclipse.jetty.orbit:javax.servlet:jar:3.1.0.v201112011016 - http://www.eclipse.org/jetty)
     (Apache 2.0) Joda-Time (joda-time:joda-time:2.8.1 - http://www.joda.org/joda-time/)
@@ -221,6 +221,16 @@ The following components are provided under Apache License.
     (Apache 2.0) Neo4j Java Driver (https://github.com/neo4j/neo4j-java-driver) - https://github.com/neo4j/neo4j-java-driver/blob/1.4.3/LICENSE.txt
     (Apache 2.0) Hazelcast Jet (http://jet.hazelcast.org) - https://github.com/hazelcast/hazelcast-jet/blob/master/LICENSE
     (Apache 2.0) RxJava (io.reactivex.rxjava2:rxjava:2.2.17) - https://github.com/ReactiveX/RxJava/blob/2.x/LICENSE
+    (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-core:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+    (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-registry-prometheus:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+    (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-registry-jmx:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+    (Apache 2.0) Prometheus Java Simpleclient Common (io.prometheus:simpleclient_common:0.9.0) - https://github.com/prometheus/client_java/blob/master/LICENSE
+    (Apache 2.0) Prometheus Java Simpleclient (io.prometheus:simpleclient:0.9.0) - https://github.com/prometheus/client_java/blob/master/LICENSE
+    (Apache 2.0) Dropwizard Metrics Core (io.dropwizard.metrics:metrics-core:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+    (Apache 2.0) Dropwizard Metrics Utility Servlets (io.dropwizard.metrics:metrics-servlets:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+    (Apache 2.0) Dropwizard Jackson Integration for Metrics (io.dropwizard.metrics:metrics-json:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+    (Apache 2.0) Dropwizard Metrics Health Checks (io.dropwizard.metrics:metrics-healthchecks:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+    (Apache 2.0) Dropwizard Metrics Integration with JMX (io.dropwizard.metrics:metrics-jmx:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
 
 ========================================================================
 MIT licenses
@@ -420,3 +430,4 @@ Creative Commons CC0 (http://creativecommons.org/publicdomain/zero/1.0/)
 Multiple licenses
 ========================================================================
   (LGPLv2) (GPLv2) (MPL 1.1) Jtransforms (com.github.rwl:jtransforms:2.4.0 - https://sourceforge.net/projects/jtransforms/)
+  (CC0 1.0) (BSD-2) HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.12 - http://hdrhistogram.github.io/HdrHistogram/)
diff --git a/zeppelin-interpreter-integration/pom.xml b/zeppelin-interpreter-integration/pom.xml
index 80eee4a..4d03dcf 100644
--- a/zeppelin-interpreter-integration/pom.xml
+++ b/zeppelin-interpreter-integration/pom.xml
@@ -77,12 +77,6 @@
       <groupId>org.apache.zeppelin</groupId>
       <artifactId>zeppelin-server</artifactId>
       <version>${project.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-annotations</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
 
     <dependency>
@@ -128,12 +122,6 @@
       <version>${project.version}</version>
       <classifier>tests</classifier>
       <scope>test</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-annotations</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
 
     <dependency>
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index 826e252..5300ce5 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -428,6 +428,14 @@ public class ZeppelinConfiguration extends XMLConfiguration {
       return getString(ConfVars.ZEPPELIN_SSL_PEM_CA);
   }
 
+  public boolean isJMXEnabled() {
+    return getBoolean(ConfVars.ZEPPELIN_JMX_ENABLE);
+  }
+
+  public int getJMXPort() {
+    return getInt(ConfVars.ZEPPELIN_JMX_PORT);
+  }
+
   public String getNotebookDir() {
     return getAbsoluteDir(ConfVars.ZEPPELIN_NOTEBOOK_DIR);
   }
@@ -508,7 +516,7 @@ public class ZeppelinConfiguration extends XMLConfiguration {
   public String getS3SignerOverride() {
     return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_SIGNEROVERRIDE);
   }
-  
+
   public boolean isS3PathStyleAccess() {
     return getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS);
   }
@@ -884,6 +892,10 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     return getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_IMAGE);
   }
 
+  public boolean isPrometheusMetricEnabled() {
+    return getBoolean(ConfVars.ZEPPELIN_METRIC_ENABLE_PROMETHEUS);
+  }
+
   public Map<String, String> dumpConfigurations(Predicate<String> predicate) {
     Map<String, String> properties = new HashMap<>();
 
@@ -958,6 +970,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     ZEPPELIN_WAR("zeppelin.war", "zeppelin-web/dist"),
     ZEPPELIN_ANGULAR_WAR("zeppelin.angular.war", "zeppelin-web-angular/dist"),
     ZEPPELIN_WAR_TEMPDIR("zeppelin.war.tempdir", "webapps"),
+    ZEPPELIN_JMX_ENABLE("zeppelin.jmx.enable", false),
+    ZEPPELIN_JMX_PORT("zeppelin.jmx.port", 9996),
 
     ZEPPELIN_INTERPRETER_JSON("zeppelin.interpreter.setting", "interpreter-setting.json"),
     ZEPPELIN_INTERPRETER_DIR("zeppelin.interpreter.dir", "interpreter"),
@@ -1095,6 +1109,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
 
     ZEPPELIN_DOCKER_CONTAINER_IMAGE("zeppelin.docker.container.image", "apache/zeppelin:" + Util.getVersion()),
 
+    ZEPPELIN_METRIC_ENABLE_PROMETHEUS("zeppelin.metric.enable.prometheus", false),
+
     ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER("zeppelin.impersonate.spark.proxy.user", true),
     ZEPPELIN_NOTEBOOK_GIT_REMOTE_URL("zeppelin.notebook.git.remote.url", ""),
     ZEPPELIN_NOTEBOOK_GIT_REMOTE_USERNAME("zeppelin.notebook.git.remote.username", "token"),
diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml
index 78e3719..4eaf6ed 100644
--- a/zeppelin-server/pom.xml
+++ b/zeppelin-server/pom.xml
@@ -36,11 +36,12 @@
   <properties>
 
     <!--library versions-->
-    <jersey.version>2.27</jersey.version>
+    <jersey.version>2.30</jersey.version>
     <jersey.servlet.version>1.13</jersey.servlet.version>
     <javax.ws.rsapi.version>2.1</javax.ws.rsapi.version>
     <libpam4j.version>1.11</libpam4j.version>
     <jna.version>4.1.0</jna.version>
+    <jackson.version>2.9.10.6</jackson.version>
 
     <!--test library versions-->
     <selenium.java.version>2.48.2</selenium.java.version>
@@ -105,6 +106,42 @@
     </dependency>
 
     <dependency>
+      <groupId>io.dropwizard.metrics</groupId>
+      <artifactId>metrics-servlets</artifactId>
+      <version>${dropwizard.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-databind</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>io.micrometer</groupId>
+      <artifactId>micrometer-registry-prometheus</artifactId>
+      <version>${micrometer.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.micrometer</groupId>
+      <artifactId>micrometer-registry-jmx</artifactId>
+      <version>${micrometer.version}</version>
+      <exclusions>
+        <!-- manual loading to get the right version that fits to other Dropwizard libraries -->
+        <exclusion>
+          <groupId>io.dropwizard.metrics</groupId>
+          <artifactId>metrics-jmx</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- Used by io.micrometer:micrometer-registry-jmx -->
+    <dependency>
+      <groupId>io.dropwizard.metrics</groupId>
+      <artifactId>metrics-jmx</artifactId>
+      <version>${dropwizard.version}</version>
+    </dependency>
+
+    <dependency>
       <groupId>org.glassfish.jersey.core</groupId>
       <artifactId>jersey-client</artifactId>
       <version>${jersey.version}</version>
@@ -133,17 +170,6 @@
           <groupId>com.fasterxml.jackson.core</groupId>
           <artifactId>jackson-databind</artifactId>
         </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-      <version>2.9.10.1</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-annotations</artifactId>
-        </exclusion>
         <exclusion>
           <groupId>com.fasterxml.jackson.core</groupId>
           <artifactId>jackson-core</artifactId>
@@ -162,6 +188,12 @@
     </dependency>
 
     <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <version>${jackson.version}</version>
+    </dependency>
+
+    <dependency>
       <groupId>javax.ws.rs</groupId>
       <artifactId>javax.ws.rs-api</artifactId>
       <version>${javax.ws.rsapi.version}</version>
@@ -223,13 +255,6 @@
       <version>${jna.version}</version>
     </dependency>
 
-    <!-- Needed for dependency conergence -->
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-      <version>2.9.9</version>
-    </dependency>
-
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-webapp</artifactId>
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java
new file mode 100644
index 0000000..9408d1c
--- /dev/null
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.metric;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.binder.MeterBinder;
+
+public class JVMInfoBinder implements MeterBinder {
+  private static final String UNKNOWN = "unknown";
+
+  @Override
+  public void bindTo(MeterRegistry registry) {
+      Counter.builder("jvm.info")
+              .description("JVM version info")
+              .tags("version", System.getProperty("java.runtime.version", UNKNOWN),
+                      "vendor", System.getProperty("java.vm.vendor", UNKNOWN),
+                      "runtime", System.getProperty("java.runtime.name", UNKNOWN))
+              .register(registry)
+              .increment();
+  }
+}
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java
new file mode 100644
index 0000000..43bdf1f
--- /dev/null
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.metric;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.micrometer.prometheus.PrometheusMeterRegistry;
+import io.prometheus.client.exporter.common.TextFormat;
+
+public class PrometheusServlet extends HttpServlet{
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(PrometheusServlet.class);
+  /**
+   *
+   */
+  private static final long serialVersionUID = 3954804532706721368L;
+
+  private final PrometheusMeterRegistry promMetricRegistry;
+
+  public PrometheusServlet(PrometheusMeterRegistry promMetricRegistry) {
+    this.promMetricRegistry = promMetricRegistry;
+  }
+
+  private static final String CACHE_CONTROL = "Cache-Control";
+  private static final String NO_CACHE = "must-revalidate,no-cache,no-store";
+  @Override
+  protected void doGet(HttpServletRequest req,
+                       HttpServletResponse resp) throws ServletException, IOException {
+    resp.setStatus(HttpServletResponse.SC_OK);
+    resp.setHeader(CACHE_CONTROL, NO_CACHE);
+    resp.setContentType(TextFormat.CONTENT_TYPE_004);
+    try (PrintWriter writer = resp.getWriter()) {
+      promMetricRegistry.scrape(writer);
+    } catch (IOException e){
+      LOGGER.error("IOException in PrometheusServlet", e);
+    }
+  }
+}
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
index ce05735..bf55006 100644
--- a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
@@ -16,10 +16,30 @@
  */
 package org.apache.zeppelin.server;
 
+import com.codahale.metrics.servlets.HealthCheckServlet;
+import com.codahale.metrics.servlets.PingServlet;
 import com.google.gson.Gson;
 
 import static org.apache.zeppelin.server.HtmlAddonResource.HTML_ADDON_IDENTIFIER;
 
+import io.micrometer.core.instrument.Clock;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.binder.jetty.InstrumentedQueuedThreadPool;
+import io.micrometer.core.instrument.binder.jetty.JettyConnectionMetrics;
+import io.micrometer.core.instrument.binder.jetty.JettySslHandshakeMetrics;
+import io.micrometer.core.instrument.binder.jetty.TimedHandler;
+import io.micrometer.core.instrument.binder.jvm.ClassLoaderMetrics;
+import io.micrometer.core.instrument.binder.jvm.JvmMemoryMetrics;
+import io.micrometer.core.instrument.binder.jvm.JvmThreadMetrics;
+import io.micrometer.core.instrument.binder.system.FileDescriptorMetrics;
+import io.micrometer.core.instrument.binder.system.ProcessorMetrics;
+import io.micrometer.core.instrument.binder.system.UptimeMetrics;
+import io.micrometer.jmx.JmxConfig;
+import io.micrometer.jmx.JmxMeterRegistry;
+import io.micrometer.prometheus.PrometheusConfig;
+import io.micrometer.prometheus.PrometheusMeterRegistry;
+
 import java.io.File;
 import java.io.IOException;
 import java.lang.management.ManagementFactory;
@@ -44,6 +64,7 @@ import org.apache.zeppelin.cluster.ClusterManagerServer;
 import org.apache.zeppelin.conf.ZeppelinConfiguration;
 import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
 import org.apache.zeppelin.display.AngularObjectRegistryListener;
+import org.apache.zeppelin.healthcheck.HealthChecks;
 import org.apache.zeppelin.helium.ApplicationEventListener;
 import org.apache.zeppelin.helium.Helium;
 import org.apache.zeppelin.helium.HeliumApplicationFactory;
@@ -54,6 +75,8 @@ import org.apache.zeppelin.interpreter.InterpreterSetting;
 import org.apache.zeppelin.interpreter.InterpreterSettingManager;
 import org.apache.zeppelin.interpreter.recovery.RecoveryStorage;
 import org.apache.zeppelin.interpreter.remote.RemoteInterpreterProcessListener;
+import org.apache.zeppelin.metric.JVMInfoBinder;
+import org.apache.zeppelin.metric.PrometheusServlet;
 import org.apache.zeppelin.notebook.NoteEventListener;
 import org.apache.zeppelin.notebook.NoteManager;
 import org.apache.zeppelin.notebook.Notebook;
@@ -76,6 +99,7 @@ import org.apache.zeppelin.user.AuthenticationInfo;
 import org.apache.zeppelin.user.Credentials;
 import org.apache.zeppelin.util.ReflectionUtils;
 import org.apache.zeppelin.utils.PEMImporter;
+import org.eclipse.jetty.http.HttpScheme;
 import org.eclipse.jetty.http.HttpVersion;
 import org.eclipse.jetty.jmx.ConnectorServer;
 import org.eclipse.jetty.jmx.MBeanContainer;
@@ -93,8 +117,6 @@ import org.eclipse.jetty.servlet.FilterHolder;
 import org.eclipse.jetty.servlet.ServletHolder;
 import org.eclipse.jetty.util.resource.Resource;
 import org.eclipse.jetty.util.ssl.SslContextFactory;
-import org.eclipse.jetty.util.thread.QueuedThreadPool;
-import org.eclipse.jetty.util.thread.ThreadPool;
 import org.eclipse.jetty.webapp.WebAppContext;
 import org.eclipse.jetty.websocket.servlet.WebSocketServlet;
 import org.glassfish.hk2.api.Immediate;
@@ -116,6 +138,7 @@ public class ZeppelinServer extends ResourceConfig {
   public static ServiceLocator sharedServiceLocator;
 
   private static ZeppelinConfiguration conf;
+  private static PrometheusMeterRegistry promMetricRegistry;
 
   public static void reset() {
     conf = null;
@@ -135,9 +158,13 @@ public class ZeppelinServer extends ResourceConfig {
     conf.setProperty("args", args);
 
     jettyWebServer = setupJettyServer(conf);
+    initMetrics(conf);
+
+    TimedHandler timedHandler = new TimedHandler(Metrics.globalRegistry, Tags.empty());
+    jettyWebServer.setHandler(timedHandler);
 
     ContextHandlerCollection contexts = new ContextHandlerCollection();
-    jettyWebServer.setHandler(contexts);
+    timedHandler.setHandler(contexts);
 
     sharedServiceLocator = ServiceLocatorFactory.getInstance().create("shared-locator");
     ServiceLocatorUtilities.enableImmediateScope(sharedServiceLocator);
@@ -204,49 +231,20 @@ public class ZeppelinServer extends ResourceConfig {
     setupClusterManagerServer(sharedServiceLocator);
 
     // JMX Enable
-    Stream.of("ZEPPELIN_JMX_ENABLE")
-        .map(System::getenv)
-        .map(Boolean::parseBoolean)
-        .filter(Boolean::booleanValue)
-        .map(jmxEnabled -> "ZEPPELIN_JMX_PORT")
-        .map(System::getenv)
-        .map(
-            portString -> {
-              try {
-                return Integer.parseInt(portString);
-              } catch (Exception e) {
-                return null;
-              }
-            })
-        .filter(Objects::nonNull)
-        .forEach(
-            port -> {
-              try {
-                MBeanContainer mbeanContainer =
-                    new MBeanContainer(ManagementFactory.getPlatformMBeanServer());
-                jettyWebServer.addEventListener(mbeanContainer);
-                jettyWebServer.addBean(mbeanContainer);
-
-                JMXServiceURL jmxURL =
-                    new JMXServiceURL(
-                        String.format(
-                            "service:jmx:rmi://0.0.0.0:%d/jndi/rmi://0.0.0.0:%d/jmxrmi",
-                            port, port));
-                ConnectorServer jmxServer =
-                    new ConnectorServer(jmxURL, "org.eclipse.jetty.jmx:name=rmiconnectorserver");
-                jettyWebServer.addBean(jmxServer);
-
-                // Add JMX Beans
-                // TODO(jl): Need to investigate more about injection and jmx
-                jettyWebServer.addBean(
-                    sharedServiceLocator.getService(InterpreterSettingManager.class));
-                jettyWebServer.addBean(sharedServiceLocator.getService(NotebookServer.class));
-
-                LOG.info("JMX Enabled with port: {}", port);
-              } catch (Exception e) {
-                LOG.warn("Error while setting JMX", e);
-              }
-            });
+    if (conf.isJMXEnabled()) {
+      int port = conf.getJMXPort();
+      // Setup JMX
+      MBeanContainer mbeanContainer = new MBeanContainer(ManagementFactory.getPlatformMBeanServer());
+      jettyWebServer.addBean(mbeanContainer);
+      JMXServiceURL jmxURL =
+        new JMXServiceURL(
+            String.format(
+                "service:jmx:rmi://0.0.0.0:%d/jndi/rmi://0.0.0.0:%d/jmxrmi",
+                port, port));
+      ConnectorServer jmxServer = new ConnectorServer(jmxURL, "org.eclipse.jetty.jmx:name=rmiconnectorserver");
+      jettyWebServer.addBean(jmxServer);
+      LOG.info("JMX Enabled with port: {}", port);
+    }
 
     LOG.info("Starting zeppelin server");
     try {
@@ -293,6 +291,23 @@ public class ZeppelinServer extends ResourceConfig {
     }
   }
 
+  private static void initMetrics(ZeppelinConfiguration conf) {
+    if (conf.isPrometheusMetricEnabled()) {
+      promMetricRegistry = new PrometheusMeterRegistry(PrometheusConfig.DEFAULT);
+      Metrics.addRegistry(promMetricRegistry);
+    }
+    if (conf.isJMXEnabled()) {
+      Metrics.addRegistry(new JmxMeterRegistry(JmxConfig.DEFAULT, Clock.SYSTEM));
+    }
+    new ClassLoaderMetrics().bindTo(Metrics.globalRegistry);
+    new JvmMemoryMetrics().bindTo(Metrics.globalRegistry);
+    new JvmThreadMetrics().bindTo(Metrics.globalRegistry);
+    new FileDescriptorMetrics().bindTo(Metrics.globalRegistry);
+    new ProcessorMetrics().bindTo(Metrics.globalRegistry);
+    new UptimeMetrics().bindTo(Metrics.globalRegistry);
+    new JVMInfoBinder().bindTo(Metrics.globalRegistry);
+  }
+
   private static Thread shutdown(ZeppelinConfiguration conf) {
     return new Thread(
             () -> {
@@ -316,8 +331,9 @@ public class ZeppelinServer extends ResourceConfig {
   }
 
   private static Server setupJettyServer(ZeppelinConfiguration conf) {
-    ThreadPool threadPool =
-      new QueuedThreadPool(conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MAX),
+    InstrumentedQueuedThreadPool threadPool =
+      new InstrumentedQueuedThreadPool(Metrics.globalRegistry, Tags.empty(),
+                           conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MAX),
                            conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MIN),
                            conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_TIMEOUT));
     final Server server = new Server(threadPool);
@@ -333,26 +349,34 @@ public class ZeppelinServer extends ResourceConfig {
     httpConfig.setRequestHeaderSize(conf.getJettyRequestHeaderSize());
     if (conf.useSsl()) {
       LOG.debug("Enabling SSL for Zeppelin Server on port {}", sslPort);
-      httpConfig.setSecureScheme("https");
+      httpConfig.setSecureScheme(HttpScheme.HTTPS.asString());
       httpConfig.setSecurePort(sslPort);
 
       HttpConfiguration httpsConfig = new HttpConfiguration(httpConfig);
       httpsConfig.addCustomizer(new SecureRequestCustomizer());
 
+      SslConnectionFactory sslConnectionFactory = new SslConnectionFactory(getSslContextFactory(conf), HttpVersion.HTTP_1_1.asString());
+      HttpConnectionFactory httpsConnectionFactory = new HttpConnectionFactory(httpsConfig);
       connector =
               new ServerConnector(
                       server,
-                      new SslConnectionFactory(getSslContextFactory(conf), HttpVersion.HTTP_1_1.asString()),
-                      new HttpConnectionFactory(httpsConfig));
+                      sslConnectionFactory,
+                      httpsConnectionFactory);
       connector.setPort(sslPort);
+      connector.addBean(new JettySslHandshakeMetrics(Metrics.globalRegistry, Tags.empty()));
     } else {
-      connector = new ServerConnector(server, new HttpConnectionFactory(httpConfig));
+      HttpConnectionFactory httpConnectionFactory = new HttpConnectionFactory(httpConfig);
+      connector =
+              new ServerConnector(
+                      server,
+                      httpConnectionFactory);
       connector.setPort(port);
     }
     // Set some timeout options to make debugging easier.
     int timeout = 1000 * 30;
     connector.setIdleTimeout(timeout);
     connector.setHost(conf.getServerAddress());
+    connector.addBean(new JettyConnectionMetrics(Metrics.globalRegistry, Tags.empty()));
     server.addConnector(connector);
   }
 
@@ -534,6 +558,16 @@ public class ZeppelinServer extends ResourceConfig {
     }
   }
 
+  private static void setupPrometheusContextHandler(WebAppContext webapp) {
+    webapp.addServlet(new ServletHolder(new PrometheusServlet(promMetricRegistry)), "/metrics");
+  }
+
+  private static void setupHealthCheckContextHandler(WebAppContext webapp) {
+    webapp.addServlet(new ServletHolder(new HealthCheckServlet(HealthChecks.getHealthCheckReadinessRegistry())), "/health/readiness");
+    webapp.addServlet(new ServletHolder(new HealthCheckServlet(HealthChecks.getHealthCheckLivenessRegistry())), "/health/liveness");
+    webapp.addServlet(new ServletHolder(new PingServlet()), "/ping");
+  }
+
   private static WebAppContext setupWebAppContext(
       ContextHandlerCollection contexts, ZeppelinConfiguration conf, String warPath, String contextPath) {
     WebAppContext webApp = new WebAppContext();
@@ -621,6 +655,11 @@ public class ZeppelinServer extends ResourceConfig {
     // Create `ZeppelinServer` using reflection and setup REST Api
     setupRestApiContextHandler(webApp, conf);
 
+    // prometheus endpoint
+    setupPrometheusContextHandler(webApp);
+    // health endpoints
+    setupHealthCheckContextHandler(webApp);
+
     // Notebook server
     setupNotebookServer(webApp, conf, sharedServiceLocator);
   }
diff --git a/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java b/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java
new file mode 100644
index 0000000..2d4fb9d
--- /dev/null
+++ b/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.metric;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.util.EntityUtils;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.rest.AbstractTestRestApi;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MetricEndpointTest extends AbstractTestRestApi {
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_METRIC_ENABLE_PROMETHEUS.getVarName(),
+      "true");
+    AbstractTestRestApi.startUp(MetricEndpointTest.class.getSimpleName());
+  }
+
+  @AfterClass
+  public static void destroy() throws Exception {
+    AbstractTestRestApi.shutDown();
+  }
+
+  /**
+   * Simple endpoint test
+   * @throws IOException
+   */
+  @Test
+  public void testPrometheusMetricJVM() throws IOException {
+    CloseableHttpResponse get = getHttpClient().execute(new HttpGet(getUrlToTest() + "/metrics"));
+    assertEquals(200, get.getStatusLine().getStatusCode());
+    String response = EntityUtils.toString(get.getEntity());
+    assertTrue("Contains JVM metric", response.contains("jvm_memory"));
+    get.close();
+  }
+
+  protected static String getUrlToTest() {
+    String url = "http://localhost:8080";
+    if (System.getProperty("url") != null) {
+      url = System.getProperty("url");
+    }
+    return url;
+  }
+}
diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml
index 34acc0a..c5ce79f 100644
--- a/zeppelin-zengine/pom.xml
+++ b/zeppelin-zengine/pom.xml
@@ -145,6 +145,19 @@
       <version>${quartz.scheduler.version}</version>
     </dependency>
 
+    <!-- Metrics -->
+    <dependency>
+        <groupId>io.micrometer</groupId>
+        <artifactId>micrometer-core</artifactId>
+        <version>${micrometer.version}</version>
+    </dependency>
+    <!-- Healthcheck -->
+    <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-healthchecks</artifactId>
+        <version>${dropwizard.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.code.gson</groupId>
       <artifactId>gson</artifactId>
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java
new file mode 100644
index 0000000..bc07d4e
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import com.codahale.metrics.health.HealthCheck;
+
+public class DummyHealthCheck extends HealthCheck {
+
+    @Override
+    protected Result check() throws Exception {
+        return Result.healthy("DummyHealthCheck okay");
+    }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java
new file mode 100644
index 0000000..5f7f48a
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.zeppelin.notebook.FileSystemStorage;
+
+import com.codahale.metrics.health.HealthCheck;
+
+public class HdfsHealthCheck extends HealthCheck {
+  private final FileSystemStorage fs;
+  private final Path path;
+
+  /**
+   *
+   * @param fs used file system
+   * @param path checked path, which should always be present
+   */
+  public HdfsHealthCheck(FileSystemStorage fs, Path path) {
+    this.fs = fs;
+    this.path= path;
+  }
+  @Override
+  protected Result check() throws Exception {
+    try {
+      if (fs.exists(path)) {
+        return Result.healthy("Filesystem okay");
+      }
+    } catch (IOException e) {
+      return Result.unhealthy("Filesystem unhealthy", e);
+    }
+    return Result.unhealthy("Filesystem unhealthy");
+  }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java
new file mode 100644
index 0000000..089bcd8
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import com.codahale.metrics.health.HealthCheckRegistry;
+
+public class HealthChecks {
+
+  private static final HealthCheckRegistry healthcheckReadinessRegistry;
+  private static final HealthCheckRegistry healthcheckLivenessRegistry;
+
+  static {
+
+    healthcheckReadinessRegistry = new HealthCheckRegistry();
+    healthcheckReadinessRegistry.register("dummy", new DummyHealthCheck());
+    // Maybe add the ThreadDeadlockHealthcheck
+    // healthcheckReadinessRegistry.register("deadlock", new ThreadDeadlockHealthCheck());
+
+    healthcheckLivenessRegistry = new HealthCheckRegistry();
+    healthcheckLivenessRegistry.register("dummy", new DummyHealthCheck());
+  }
+
+  private HealthChecks() {
+    // not used
+  }
+
+  public static HealthCheckRegistry getHealthCheckReadinessRegistry() {
+    return healthcheckReadinessRegistry;
+  }
+
+  public static HealthCheckRegistry getHealthCheckLivenessRegistry() {
+    return healthcheckLivenessRegistry;
+  }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
index 985cb47..e578dfc 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
@@ -26,8 +26,13 @@ import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import com.google.gson.reflect.TypeToken;
 
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.Meter;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tags;
 import java.util.Arrays;
 import java.util.LinkedHashMap;
+import java.util.LinkedList;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -124,8 +129,9 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
    * id --> InterpreterSetting
    * TODO(zjffdu) change it to name --> InterpreterSetting
    */
-  private final Map<String, InterpreterSetting> interpreterSettings =
-      Maps.newConcurrentMap();
+  private final Map<String, InterpreterSetting> interpreterSettings = Metrics.gaugeMapSize("interpreter.amount", Tags.empty(),
+      Maps.newConcurrentMap());
+  private final Map<String, List<Meter>> interpreterSettingsMeters = Maps.newConcurrentMap();
 
   private final List<RemoteRepository> interpreterRepositories;
   private InterpreterOption defaultOption;
@@ -240,7 +246,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
         InterpreterSetting interpreterSetting = new InterpreterSetting(interpreterSettingTemplate);
         initInterpreterSetting(interpreterSetting);
         if (shouldRegister(interpreterSetting.getGroup())) {
-          interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+          addInterpreterSetting(interpreterSetting);
         }
       }
       return;
@@ -281,13 +287,13 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
       // remove it first
       for (InterpreterSetting setting : interpreterSettings.values()) {
         if (setting.getName().equals(savedInterpreterSetting.getName())) {
-          interpreterSettings.remove(setting.getId());
+          removeInterpreterSetting(setting.getId());
         }
       }
       savedInterpreterSetting.postProcessing();
       LOGGER.info("Create interpreter setting {} from interpreter.json",
           savedInterpreterSetting.getName());
-      interpreterSettings.put(savedInterpreterSetting.getId(), savedInterpreterSetting);
+      addInterpreterSetting(savedInterpreterSetting);
     }
 
     for (InterpreterSetting interpreterSettingTemplate : interpreterSettingTemplates.values()) {
@@ -296,7 +302,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
       // add newly detected interpreter if it doesn't exist in interpreter.json
       if (!interpreterSettings.containsKey(interpreterSetting.getId())) {
         LOGGER.info("Create interpreter setting: {} from interpreter setting template", interpreterSetting.getId());
-        interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+        addInterpreterSetting(interpreterSetting);
       }
     }
 
@@ -315,6 +321,28 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
     }
   }
 
+  private void addInterpreterSetting(InterpreterSetting interpreterSetting) {
+    interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+    List<Meter> meters = new LinkedList<>();
+    Gauge size = Gauge
+      .builder("interpreter.group.size", () -> interpreterSetting.getAllInterpreterGroups().size())
+      .description("Size of all interpreter groups")
+      .tags(Tags.of("name", interpreterSetting.getId()))
+      .tags(Tags.of("group", interpreterSetting.getGroup()))
+      .register(Metrics.globalRegistry);
+    meters.add(size);
+    interpreterSettingsMeters.put(interpreterSetting.getId(), meters);
+  }
+
+
+  private void removeInterpreterSetting(String id) {
+    interpreterSettings.remove(id);
+    List<Meter> meters = interpreterSettingsMeters.remove(id);
+    for (Meter meter : meters) {
+      Metrics.globalRegistry.remove(meter);
+    }
+  }
+
   public void saveToFile() throws IOException {
     InterpreterInfoSaving info = new InterpreterInfoSaving();
     info.interpreterSettings = Maps.newHashMap(interpreterSettings);
@@ -322,6 +350,14 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
     configStorage.save(info);
   }
 
+  private void initMetrics() {
+    Gauge
+      .builder("interpreter.group.size.total", () -> getAllInterpreterGroup().size())
+      .description("Size of all interpreter groups")
+      .tags()
+      .register(Metrics.globalRegistry);
+  }
+
   private void init() throws IOException {
     this.includesInterpreters =
             Arrays.asList(conf.getString(ConfVars.ZEPPELIN_INTERPRETER_INCLUDES).split(","))
@@ -342,6 +378,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
     loadInterpreterSettingFromDefaultDir(true);
     loadFromFile();
     saveToFile();
+    initMetrics();
 
     // must init Recovery after init of InterpreterSettingManager
     recoveryStorage.init();
@@ -827,7 +864,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
     setting.setInterpreterOption(option);
     setting.setProperties(properties);
     initInterpreterSetting(setting);
-    interpreterSettings.put(setting.getId(), setting);
+    addInterpreterSetting(setting);
     saveToFile();
 
     return setting;
@@ -977,7 +1014,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
     if (interpreterSettings.containsKey(id)) {
       InterpreterSetting intp = interpreterSettings.get(id);
       intp.close();
-      interpreterSettings.remove(id);
+      removeInterpreterSetting(id);
       if (initiator) {
         // Event initiator saves the file
         // Cluster event accepting nodes do not need to save files repeatedly
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
index c410ddc..2aed172 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
@@ -31,16 +31,20 @@ import java.util.HashMap;
 public class CronJob implements org.quartz.Job {
   private static final Logger LOGGER = LoggerFactory.getLogger(CronJob.class);
 
+  private static final String RESULT_SUCCEEDED = "succeeded";
+  private static final String RESULT_FAILED = "failed";
+  private static final String RESULT_SKIPPED = "skipped";
+
   @Override
   public void execute(JobExecutionContext context) {
     JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
     Note note = (Note) jobDataMap.get("note");
-    LOGGER.info("Start cron job of note: {}", note.getId());
     if (note.haveRunningOrPendingParagraphs()) {
       LOGGER.warn(
           "execution of the cron job is skipped because there is a running or pending "
               + "paragraph (note id: {})",
           note.getId());
+      context.setResult(RESULT_SKIPPED);
       return;
     }
 
@@ -56,7 +60,9 @@ public class CronJob implements org.quartz.Job {
                     null);
     try {
       note.runAll(authenticationInfo, true, true, new HashMap<>());
+      context.setResult(RESULT_SUCCEEDED);
     } catch (Exception e) {
+      context.setResult(RESULT_FAILED);
       LOGGER.warn("Fail to run note: {}", note.getName(), e);
     }
   }
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java
new file mode 100644
index 0000000..90a712c
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java
@@ -0,0 +1,63 @@
+package org.apache.zeppelin.notebook.scheduler;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.zeppelin.notebook.Note;
+import org.quartz.JobDataMap;
+import org.quartz.JobExecutionContext;
+import org.quartz.JobExecutionException;
+import org.quartz.JobListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tag;
+import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.Timer;
+
+public class CronJobListener implements JobListener {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(CronJobListener.class);
+
+  // JobExecutionContext -> Timer.Sample
+  private Map<JobExecutionContext, Timer.Sample> cronJobTimerSamples = new HashMap<>();
+
+  @Override
+  public String getName() {
+    return getClass().getSimpleName();
+  }
+
+  @Override
+  public void jobToBeExecuted(JobExecutionContext context) {
+    JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+    Note note = (Note) jobDataMap.get("note");
+    LOGGER.info("Start cron job of note: {}", note.getId());
+    cronJobTimerSamples.put(context, Timer.start(Metrics.globalRegistry));
+  }
+
+  @Override
+  public void jobExecutionVetoed(JobExecutionContext context) {
+    JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+    Note note = (Note) jobDataMap.get("note");
+    LOGGER.info("vetoed cron job of note: {}", note.getId());
+  }
+
+  @Override
+  public void jobWasExecuted(JobExecutionContext context, JobExecutionException jobException) {
+    JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+    Note note = (Note) jobDataMap.get("note");
+    String result = StringUtils.defaultString(context.getResult().toString(), "unknown");
+    LOGGER.info("cron job of noteId {} executed with result {}", note.getId(), result);
+    Timer.Sample sample = cronJobTimerSamples.remove(context);
+    if (sample != null) {
+      Tag noteId = Tag.of("nodeid", note.getId());
+      Tag name = Tag.of("name", StringUtils.defaultString(note.getName(), "unknown"));
+      Tag statusTag = Tag.of("result", result);
+      sample.stop(Metrics.timer("cronjob", Tags.of(noteId, name, statusTag)));
+    } else {
+      LOGGER.warn("No Timer.Sample for NoteId {} found", note.getId());
+    }
+  }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
index 0211e0e..e9894a2 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
@@ -56,6 +56,7 @@ public class QuartzSchedulerService implements SchedulerService {
     this.zeppelinConfiguration = zeppelinConfiguration;
     this.notebook = notebook;
     this.scheduler = getScheduler();
+    this.scheduler.getListenerManager().addJobListener(new CronJobListener());
     this.scheduler.start();
 
     // Do in a separated thread because there may be many notes,
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
index da6f20e..c380e5a 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
@@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.gson.JsonObject;
 import com.google.gson.JsonParser;
 import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.healthcheck.HealthChecks;
 import org.apache.zeppelin.interpreter.InterpreterInfoSaving;
 import org.apache.zeppelin.interpreter.InterpreterSetting;
 import org.apache.zeppelin.notebook.NotebookAuthorizationInfoSaving;
@@ -40,6 +41,8 @@ import java.io.IOException;
  */
 public abstract class ConfigStorage {
 
+  protected static final String STORAGE_HEALTHCHECK_NAME = "ConfigStorage";
+
   private static ConfigStorage instance;
 
   protected ZeppelinConfiguration zConf;
@@ -96,6 +99,7 @@ public abstract class ConfigStorage {
 
   @VisibleForTesting
   public static void reset() {
+    HealthChecks.getHealthCheckLivenessRegistry().unregister(STORAGE_HEALTHCHECK_NAME);
     instance = null;
   }
 }
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
index 005923e..ac7d108 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
@@ -20,6 +20,8 @@ package org.apache.zeppelin.storage;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.healthcheck.HdfsHealthCheck;
+import org.apache.zeppelin.healthcheck.HealthChecks;
 import org.apache.zeppelin.interpreter.InterpreterInfoSaving;
 import org.apache.zeppelin.notebook.FileSystemStorage;
 import org.apache.zeppelin.notebook.NotebookAuthorizationInfoSaving;
@@ -56,6 +58,7 @@ public class FileSystemConfigStorage extends ConfigStorage {
     this.interpreterSettingPath = fs.makeQualified(new Path(zConf.getInterpreterSettingPath(false)));
     this.authorizationPath = fs.makeQualified(new Path(zConf.getNotebookAuthorizationPath(false)));
     this.credentialPath = fs.makeQualified(new Path(zConf.getCredentialsPath(false)));
+    HealthChecks.getHealthCheckLivenessRegistry().register(STORAGE_HEALTHCHECK_NAME, new HdfsHealthCheck(this.fs, configPath));
   }
 
   @Override