You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pd...@apache.org on 2020/11/23 15:50:49 UTC
[zeppelin] branch branch-0.9 updated: [ZEPPELIN-4977] Metrics and
healthcheck
This is an automated email from the ASF dual-hosted git repository.
pdallig pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/branch-0.9 by this push:
new 438dad2 [ZEPPELIN-4977] Metrics and healthcheck
438dad2 is described below
commit 438dad20bd6b1e898ffed806a6fd4f25a320f320
Author: Philipp Dallig <ph...@gmail.com>
AuthorDate: Mon Nov 16 16:42:43 2020 +0100
[ZEPPELIN-4977] Metrics and healthcheck
### What is this PR for?
This PR includes:
- Configurable Prometheus monitoring with endpoint (`/metrics`)
- Rewrite the JMX metric endpoint
- two new Healthcheck endpoints (`/health/readiness`, `/health/liveness`) and a ping endpoint (`/ping`)
- some default metrics (jetty, jvm, interpreter)
### What type of PR is it?
- Improvement
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-4977
* https://issues.apache.org/jira/browse/ZEPPELIN-4976
### How should this be tested?
* Travic-CI: https://travis-ci.com/github/Reamer/zeppelin/builds/201787227
### Questions:
* Does the licenses files need update? Yes, included in PR
* Is there breaking changes for older versions? Yes, the JMX output for metrics changes
* Does this needs documentation? Yes, included in PR
Author: Philipp Dallig <ph...@gmail.com>
Closes #3971 from Reamer/metrics_and_healthcheck_micro and squashes the following commits:
6dd4113c5 [Philipp Dallig] Add cron properties in configuration.md
eed7ff57c [Philipp Dallig] Add CronJobs metrics
103241a0d [Philipp Dallig] Rewrite JMX metric endpoint
282865d8f [Philipp Dallig] Add jetty metrics
6d8106052 [Philipp Dallig] Add Interpreter metrics
e0757517c [Philipp Dallig] Add HDFS Healthcheck
91d735b12 [Philipp Dallig] Add Healthchecks with Dropwizard
14038c375 [Philipp Dallig] Add micrometer for metrics
(cherry picked from commit 1e177dba411ae42bb8f06b6692030a5ba2f981b2)
Signed-off-by: Philipp Dallig <ph...@gmail.com>
---
docs/index.md | 1 +
docs/setup/operation/configuration.md | 24 +++-
docs/setup/operation/monitoring.md | 37 ++++++
pom.xml | 2 +
zeppelin-distribution/src/bin_license/LICENSE | 17 ++-
zeppelin-interpreter-integration/pom.xml | 12 --
.../zeppelin/conf/ZeppelinConfiguration.java | 18 ++-
zeppelin-server/pom.xml | 63 ++++++---
.../org/apache/zeppelin/metric/JVMInfoBinder.java | 36 ++++++
.../apache/zeppelin/metric/PrometheusServlet.java | 61 +++++++++
.../org/apache/zeppelin/server/ZeppelinServer.java | 143 +++++++++++++--------
.../apache/zeppelin/metric/MetricEndpointTest.java | 68 ++++++++++
zeppelin-zengine/pom.xml | 13 ++
.../zeppelin/healthcheck/DummyHealthCheck.java | 27 ++++
.../zeppelin/healthcheck/HdfsHealthCheck.java | 50 +++++++
.../apache/zeppelin/healthcheck/HealthChecks.java | 48 +++++++
.../interpreter/InterpreterSettingManager.java | 53 ++++++--
.../zeppelin/notebook/scheduler/CronJob.java | 8 +-
.../notebook/scheduler/CronJobListener.java | 63 +++++++++
.../notebook/scheduler/QuartzSchedulerService.java | 1 +
.../org/apache/zeppelin/storage/ConfigStorage.java | 4 +
.../zeppelin/storage/FileSystemConfigStorage.java | 3 +
22 files changed, 653 insertions(+), 99 deletions(-)
diff --git a/docs/index.md b/docs/index.md
index fcfb2e5..36301d0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -112,6 +112,7 @@ limitations under the License.
* [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb)
* Operation
* [Configuration](./setup/operation/configuration.html): lists for Apache Zeppelin
+ * [Monitoring](./setup/operation/monitoring.html): monitoring instructions for Apache Zeppelin
* [Proxy Setting](./setup/operation/proxy_setting.html)
* [Upgrading](./setup/operation/upgrading.html): a manual procedure of upgrading Apache Zeppelin version
* [Trouble Shooting](./setup/operation/trouble_shooting.html)
diff --git a/docs/setup/operation/configuration.md b/docs/setup/operation/configuration.md
index 4f7c734..c745531 100644
--- a/docs/setup/operation/configuration.md
+++ b/docs/setup/operation/configuration.md
@@ -61,13 +61,13 @@ If both are defined, then the **environment variables** will take priority.
</tr>
<tr>
<td><h6 class="properties">ZEPPELIN_JMX_ENABLE</h6></td>
- <td><h6 class="properties">N/A</h6></td>
- <td></td>
+ <td><h6 class="properties">zeppelin.jmx.enable</h6></td>
+ <td>false</td>
<td>Enable JMX by defining "true"</td>
</tr>
<tr>
<td><h6 class="properties">ZEPPELIN_JMX_PORT</h6></td>
- <td><h6 class="properties">N/A</h6></td>
+ <td><h6 class="properties">zeppelin.jmx.port</h6></td>
<td>9996</td>
<td>Port number which JMX uses</td>
</tr>
@@ -443,6 +443,24 @@ If both are defined, then the **environment variables** will take priority.
<td>true</td>
<td>Value to enable/disable timeout handling when starting Interpreter Pods. Caution: This can lead to an infinity loop</td>
</tr>
+ <tr>
+ <td><h6 class="properties">ZEPPELIN_METRIC_ENABLE_PROMETHEUS</h6></td>
+ <td><h6 class="properties">zeppelin.metric.enable.prometheus</h6></td>
+ <td>false</td>
+ <td>Value to enable/disable Prometheus metric endpoint on /metric</td>
+ </tr>
+ <tr>
+ <td><h6 class="properties">ZEPPELIN_NOTEBOOK_CRON_ENABLE</h6></td>
+ <td><h6 class="properties">zeppelin.notebook.cron.enable</h6></td>
+ <td>false</td>
+ <td>Value to enable/disable Cron support in Notes</td>
+ </tr>
+ <tr>
+ <td><h6 class="properties">ZEPPELIN_NOTEBOOK_CRON_FOLDERS</h6></td>
+ <td><h6 class="properties">zeppelin.notebook.cron.folders</h6></td>
+ <td></td>
+ <td>comma-separated list of folder, where cron is allowed</td>
+ </tr>
</table>
diff --git a/docs/setup/operation/monitoring.md b/docs/setup/operation/monitoring.md
new file mode 100644
index 0000000..538b115
--- /dev/null
+++ b/docs/setup/operation/monitoring.md
@@ -0,0 +1,37 @@
+---
+layout: page
+title: "Apache Zeppelin Monitoring"
+description: "This page shows you the monitoring options you have in Apache Zeppelin"
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# Apache Zeppelin Monitoring
+
+<div id="toc"></div>
+
+## Monitoring Options
+
+Apache Zeppelin is using [Micrometer](https://micrometer.io/) - a vendor-neutral application metrics facade.
+
+### Prometheus Monitoring
+
+[Prometheus](https://prometheus.io/) is the leading monitoring solution for [Kubernetes](https://kubernetes.io/). The Prometheus endpoint can be activated with the configuration property `zeppelin.metric.enable.prometheus`. The metrics are accessible via the unauthenticated endpoint `/metrics`.
+
+### JMX Monitoring
+
+[JMX](https://en.wikipedia.org/wiki/Java_Management_Extensions) is a general solution for monitoring Java applications. JMX can be activated with the configuration property `zeppelin.jmx.enable`. The default port 9996 can be changed with the configuration property `zeppelin.jmx.port`.
+
+## Healthcheck Probe
+
+Apache Zeppelin has two healthcheck related unauthenticated endpoints (`/health/readiness`, `/health/liveness`) that could be used for proxy and/or cloud setups.
diff --git a/pom.xml b/pom.xml
index bdafb27..262b5b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -140,6 +140,8 @@
<joda.version>2.9.9</joda.version>
<bouncycastle.version>1.60</bouncycastle.version>
<maven.version>3.6.3</maven.version>
+ <dropwizard.version>4.1.14</dropwizard.version>
+ <micrometer.version>1.6.0</micrometer.version>
<hadoop2.7.version>2.7.7</hadoop2.7.version>
<hadoop2.6.version>2.6.5</hadoop2.6.version>
diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE
index 8d88b7c..532c765 100644
--- a/zeppelin-distribution/src/bin_license/LICENSE
+++ b/zeppelin-distribution/src/bin_license/LICENSE
@@ -52,9 +52,9 @@ The following components are provided under Apache License.
(Apache 2.0) Codehaus Plexus Utils (org.codehaus.plexus:plexus-utils:3.2.1 - http://github.com/codehaus-plexus/plexus-utils)
(Apache 2.0) findbugs jsr305 (com.google.code.findbugs:jsr305:jar:1.3.9 - http://findbugs.sourceforge.net/)
(Apache 2.0) Google Guava (com.google.guava:guava:15.0 - https://code.google.com/p/guava-libraries/)
- (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.7.0 - https://github.com/FasterXML/jackson-core)
- (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.9.9 - https://github.com/FasterXML/jackson-core)
- (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.9.9.1 - https://github.com/FasterXML/jackson-core)
+ (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.9.10 - https://github.com/FasterXML/jackson-core)
+ (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.9.10 - https://github.com/FasterXML/jackson-core)
+ (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.9.10.6 - https://github.com/FasterXML/jackson-core)
(Apache 2.0) Jackson Mapper ASL (org.codehaus.jackson:jackson-mapper-asl:1.9.13 - https://mvnrepository.com/artifact/org.codehaus.jackson/jackson-mapper-asl/1.9.13)
(Apache 2.0) javax.servlet (org.eclipse.jetty.orbit:javax.servlet:jar:3.1.0.v201112011016 - http://www.eclipse.org/jetty)
(Apache 2.0) Joda-Time (joda-time:joda-time:2.8.1 - http://www.joda.org/joda-time/)
@@ -221,6 +221,16 @@ The following components are provided under Apache License.
(Apache 2.0) Neo4j Java Driver (https://github.com/neo4j/neo4j-java-driver) - https://github.com/neo4j/neo4j-java-driver/blob/1.4.3/LICENSE.txt
(Apache 2.0) Hazelcast Jet (http://jet.hazelcast.org) - https://github.com/hazelcast/hazelcast-jet/blob/master/LICENSE
(Apache 2.0) RxJava (io.reactivex.rxjava2:rxjava:2.2.17) - https://github.com/ReactiveX/RxJava/blob/2.x/LICENSE
+ (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-core:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+ (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-registry-prometheus:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+ (Apache 2.0) Application monitoring instrumentation facade (io.micrometer:micrometer-registry-jmx:1.6.0) - https://github.com/micrometer-metrics/micrometer/blob/master/LICENSE
+ (Apache 2.0) Prometheus Java Simpleclient Common (io.prometheus:simpleclient_common:0.9.0) - https://github.com/prometheus/client_java/blob/master/LICENSE
+ (Apache 2.0) Prometheus Java Simpleclient (io.prometheus:simpleclient:0.9.0) - https://github.com/prometheus/client_java/blob/master/LICENSE
+ (Apache 2.0) Dropwizard Metrics Core (io.dropwizard.metrics:metrics-core:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+ (Apache 2.0) Dropwizard Metrics Utility Servlets (io.dropwizard.metrics:metrics-servlets:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+ (Apache 2.0) Dropwizard Jackson Integration for Metrics (io.dropwizard.metrics:metrics-json:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+ (Apache 2.0) Dropwizard Metrics Health Checks (io.dropwizard.metrics:metrics-healthchecks:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
+ (Apache 2.0) Dropwizard Metrics Integration with JMX (io.dropwizard.metrics:metrics-jmx:4.1.14) - https://github.com/dropwizard/metrics/blob/release/4.1.x/LICENSE
========================================================================
MIT licenses
@@ -420,3 +430,4 @@ Creative Commons CC0 (http://creativecommons.org/publicdomain/zero/1.0/)
Multiple licenses
========================================================================
(LGPLv2) (GPLv2) (MPL 1.1) Jtransforms (com.github.rwl:jtransforms:2.4.0 - https://sourceforge.net/projects/jtransforms/)
+ (CC0 1.0) (BSD-2) HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.12 - http://hdrhistogram.github.io/HdrHistogram/)
diff --git a/zeppelin-interpreter-integration/pom.xml b/zeppelin-interpreter-integration/pom.xml
index 80eee4a..4d03dcf 100644
--- a/zeppelin-interpreter-integration/pom.xml
+++ b/zeppelin-interpreter-integration/pom.xml
@@ -77,12 +77,6 @@
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-server</artifactId>
<version>${project.version}</version>
- <exclusions>
- <exclusion>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-annotations</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
@@ -128,12 +122,6 @@
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-annotations</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index 826e252..5300ce5 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -428,6 +428,14 @@ public class ZeppelinConfiguration extends XMLConfiguration {
return getString(ConfVars.ZEPPELIN_SSL_PEM_CA);
}
+ public boolean isJMXEnabled() {
+ return getBoolean(ConfVars.ZEPPELIN_JMX_ENABLE);
+ }
+
+ public int getJMXPort() {
+ return getInt(ConfVars.ZEPPELIN_JMX_PORT);
+ }
+
public String getNotebookDir() {
return getAbsoluteDir(ConfVars.ZEPPELIN_NOTEBOOK_DIR);
}
@@ -508,7 +516,7 @@ public class ZeppelinConfiguration extends XMLConfiguration {
public String getS3SignerOverride() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_SIGNEROVERRIDE);
}
-
+
public boolean isS3PathStyleAccess() {
return getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS);
}
@@ -884,6 +892,10 @@ public class ZeppelinConfiguration extends XMLConfiguration {
return getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_IMAGE);
}
+ public boolean isPrometheusMetricEnabled() {
+ return getBoolean(ConfVars.ZEPPELIN_METRIC_ENABLE_PROMETHEUS);
+ }
+
public Map<String, String> dumpConfigurations(Predicate<String> predicate) {
Map<String, String> properties = new HashMap<>();
@@ -958,6 +970,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
ZEPPELIN_WAR("zeppelin.war", "zeppelin-web/dist"),
ZEPPELIN_ANGULAR_WAR("zeppelin.angular.war", "zeppelin-web-angular/dist"),
ZEPPELIN_WAR_TEMPDIR("zeppelin.war.tempdir", "webapps"),
+ ZEPPELIN_JMX_ENABLE("zeppelin.jmx.enable", false),
+ ZEPPELIN_JMX_PORT("zeppelin.jmx.port", 9996),
ZEPPELIN_INTERPRETER_JSON("zeppelin.interpreter.setting", "interpreter-setting.json"),
ZEPPELIN_INTERPRETER_DIR("zeppelin.interpreter.dir", "interpreter"),
@@ -1095,6 +1109,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
ZEPPELIN_DOCKER_CONTAINER_IMAGE("zeppelin.docker.container.image", "apache/zeppelin:" + Util.getVersion()),
+ ZEPPELIN_METRIC_ENABLE_PROMETHEUS("zeppelin.metric.enable.prometheus", false),
+
ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER("zeppelin.impersonate.spark.proxy.user", true),
ZEPPELIN_NOTEBOOK_GIT_REMOTE_URL("zeppelin.notebook.git.remote.url", ""),
ZEPPELIN_NOTEBOOK_GIT_REMOTE_USERNAME("zeppelin.notebook.git.remote.username", "token"),
diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml
index 78e3719..4eaf6ed 100644
--- a/zeppelin-server/pom.xml
+++ b/zeppelin-server/pom.xml
@@ -36,11 +36,12 @@
<properties>
<!--library versions-->
- <jersey.version>2.27</jersey.version>
+ <jersey.version>2.30</jersey.version>
<jersey.servlet.version>1.13</jersey.servlet.version>
<javax.ws.rsapi.version>2.1</javax.ws.rsapi.version>
<libpam4j.version>1.11</libpam4j.version>
<jna.version>4.1.0</jna.version>
+ <jackson.version>2.9.10.6</jackson.version>
<!--test library versions-->
<selenium.java.version>2.48.2</selenium.java.version>
@@ -105,6 +106,42 @@
</dependency>
<dependency>
+ <groupId>io.dropwizard.metrics</groupId>
+ <artifactId>metrics-servlets</artifactId>
+ <version>${dropwizard.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>io.micrometer</groupId>
+ <artifactId>micrometer-registry-prometheus</artifactId>
+ <version>${micrometer.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.micrometer</groupId>
+ <artifactId>micrometer-registry-jmx</artifactId>
+ <version>${micrometer.version}</version>
+ <exclusions>
+ <!-- manual loading to get the right version that fits to other Dropwizard libraries -->
+ <exclusion>
+ <groupId>io.dropwizard.metrics</groupId>
+ <artifactId>metrics-jmx</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <!-- Used by io.micrometer:micrometer-registry-jmx -->
+ <dependency>
+ <groupId>io.dropwizard.metrics</groupId>
+ <artifactId>metrics-jmx</artifactId>
+ <version>${dropwizard.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-client</artifactId>
<version>${jersey.version}</version>
@@ -133,17 +170,6 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-databind</artifactId>
- <version>2.9.10.1</version>
- <exclusions>
- <exclusion>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-annotations</artifactId>
- </exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
@@ -162,6 +188,12 @@
</dependency>
<dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>${jackson.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>javax.ws.rs</groupId>
<artifactId>javax.ws.rs-api</artifactId>
<version>${javax.ws.rsapi.version}</version>
@@ -223,13 +255,6 @@
<version>${jna.version}</version>
</dependency>
- <!-- Needed for dependency conergence -->
- <dependency>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-annotations</artifactId>
- <version>2.9.9</version>
- </dependency>
-
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java
new file mode 100644
index 0000000..9408d1c
--- /dev/null
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/JVMInfoBinder.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.metric;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.binder.MeterBinder;
+
+public class JVMInfoBinder implements MeterBinder {
+ private static final String UNKNOWN = "unknown";
+
+ @Override
+ public void bindTo(MeterRegistry registry) {
+ Counter.builder("jvm.info")
+ .description("JVM version info")
+ .tags("version", System.getProperty("java.runtime.version", UNKNOWN),
+ "vendor", System.getProperty("java.vm.vendor", UNKNOWN),
+ "runtime", System.getProperty("java.runtime.name", UNKNOWN))
+ .register(registry)
+ .increment();
+ }
+}
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java
new file mode 100644
index 0000000..43bdf1f
--- /dev/null
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/metric/PrometheusServlet.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.metric;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.micrometer.prometheus.PrometheusMeterRegistry;
+import io.prometheus.client.exporter.common.TextFormat;
+
+public class PrometheusServlet extends HttpServlet{
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(PrometheusServlet.class);
+ /**
+ *
+ */
+ private static final long serialVersionUID = 3954804532706721368L;
+
+ private final PrometheusMeterRegistry promMetricRegistry;
+
+ public PrometheusServlet(PrometheusMeterRegistry promMetricRegistry) {
+ this.promMetricRegistry = promMetricRegistry;
+ }
+
+ private static final String CACHE_CONTROL = "Cache-Control";
+ private static final String NO_CACHE = "must-revalidate,no-cache,no-store";
+ @Override
+ protected void doGet(HttpServletRequest req,
+ HttpServletResponse resp) throws ServletException, IOException {
+ resp.setStatus(HttpServletResponse.SC_OK);
+ resp.setHeader(CACHE_CONTROL, NO_CACHE);
+ resp.setContentType(TextFormat.CONTENT_TYPE_004);
+ try (PrintWriter writer = resp.getWriter()) {
+ promMetricRegistry.scrape(writer);
+ } catch (IOException e){
+ LOGGER.error("IOException in PrometheusServlet", e);
+ }
+ }
+}
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
index ce05735..bf55006 100644
--- a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
@@ -16,10 +16,30 @@
*/
package org.apache.zeppelin.server;
+import com.codahale.metrics.servlets.HealthCheckServlet;
+import com.codahale.metrics.servlets.PingServlet;
import com.google.gson.Gson;
import static org.apache.zeppelin.server.HtmlAddonResource.HTML_ADDON_IDENTIFIER;
+import io.micrometer.core.instrument.Clock;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.binder.jetty.InstrumentedQueuedThreadPool;
+import io.micrometer.core.instrument.binder.jetty.JettyConnectionMetrics;
+import io.micrometer.core.instrument.binder.jetty.JettySslHandshakeMetrics;
+import io.micrometer.core.instrument.binder.jetty.TimedHandler;
+import io.micrometer.core.instrument.binder.jvm.ClassLoaderMetrics;
+import io.micrometer.core.instrument.binder.jvm.JvmMemoryMetrics;
+import io.micrometer.core.instrument.binder.jvm.JvmThreadMetrics;
+import io.micrometer.core.instrument.binder.system.FileDescriptorMetrics;
+import io.micrometer.core.instrument.binder.system.ProcessorMetrics;
+import io.micrometer.core.instrument.binder.system.UptimeMetrics;
+import io.micrometer.jmx.JmxConfig;
+import io.micrometer.jmx.JmxMeterRegistry;
+import io.micrometer.prometheus.PrometheusConfig;
+import io.micrometer.prometheus.PrometheusMeterRegistry;
+
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
@@ -44,6 +64,7 @@ import org.apache.zeppelin.cluster.ClusterManagerServer;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
import org.apache.zeppelin.display.AngularObjectRegistryListener;
+import org.apache.zeppelin.healthcheck.HealthChecks;
import org.apache.zeppelin.helium.ApplicationEventListener;
import org.apache.zeppelin.helium.Helium;
import org.apache.zeppelin.helium.HeliumApplicationFactory;
@@ -54,6 +75,8 @@ import org.apache.zeppelin.interpreter.InterpreterSetting;
import org.apache.zeppelin.interpreter.InterpreterSettingManager;
import org.apache.zeppelin.interpreter.recovery.RecoveryStorage;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterProcessListener;
+import org.apache.zeppelin.metric.JVMInfoBinder;
+import org.apache.zeppelin.metric.PrometheusServlet;
import org.apache.zeppelin.notebook.NoteEventListener;
import org.apache.zeppelin.notebook.NoteManager;
import org.apache.zeppelin.notebook.Notebook;
@@ -76,6 +99,7 @@ import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.user.Credentials;
import org.apache.zeppelin.util.ReflectionUtils;
import org.apache.zeppelin.utils.PEMImporter;
+import org.eclipse.jetty.http.HttpScheme;
import org.eclipse.jetty.http.HttpVersion;
import org.eclipse.jetty.jmx.ConnectorServer;
import org.eclipse.jetty.jmx.MBeanContainer;
@@ -93,8 +117,6 @@ import org.eclipse.jetty.servlet.FilterHolder;
import org.eclipse.jetty.servlet.ServletHolder;
import org.eclipse.jetty.util.resource.Resource;
import org.eclipse.jetty.util.ssl.SslContextFactory;
-import org.eclipse.jetty.util.thread.QueuedThreadPool;
-import org.eclipse.jetty.util.thread.ThreadPool;
import org.eclipse.jetty.webapp.WebAppContext;
import org.eclipse.jetty.websocket.servlet.WebSocketServlet;
import org.glassfish.hk2.api.Immediate;
@@ -116,6 +138,7 @@ public class ZeppelinServer extends ResourceConfig {
public static ServiceLocator sharedServiceLocator;
private static ZeppelinConfiguration conf;
+ private static PrometheusMeterRegistry promMetricRegistry;
public static void reset() {
conf = null;
@@ -135,9 +158,13 @@ public class ZeppelinServer extends ResourceConfig {
conf.setProperty("args", args);
jettyWebServer = setupJettyServer(conf);
+ initMetrics(conf);
+
+ TimedHandler timedHandler = new TimedHandler(Metrics.globalRegistry, Tags.empty());
+ jettyWebServer.setHandler(timedHandler);
ContextHandlerCollection contexts = new ContextHandlerCollection();
- jettyWebServer.setHandler(contexts);
+ timedHandler.setHandler(contexts);
sharedServiceLocator = ServiceLocatorFactory.getInstance().create("shared-locator");
ServiceLocatorUtilities.enableImmediateScope(sharedServiceLocator);
@@ -204,49 +231,20 @@ public class ZeppelinServer extends ResourceConfig {
setupClusterManagerServer(sharedServiceLocator);
// JMX Enable
- Stream.of("ZEPPELIN_JMX_ENABLE")
- .map(System::getenv)
- .map(Boolean::parseBoolean)
- .filter(Boolean::booleanValue)
- .map(jmxEnabled -> "ZEPPELIN_JMX_PORT")
- .map(System::getenv)
- .map(
- portString -> {
- try {
- return Integer.parseInt(portString);
- } catch (Exception e) {
- return null;
- }
- })
- .filter(Objects::nonNull)
- .forEach(
- port -> {
- try {
- MBeanContainer mbeanContainer =
- new MBeanContainer(ManagementFactory.getPlatformMBeanServer());
- jettyWebServer.addEventListener(mbeanContainer);
- jettyWebServer.addBean(mbeanContainer);
-
- JMXServiceURL jmxURL =
- new JMXServiceURL(
- String.format(
- "service:jmx:rmi://0.0.0.0:%d/jndi/rmi://0.0.0.0:%d/jmxrmi",
- port, port));
- ConnectorServer jmxServer =
- new ConnectorServer(jmxURL, "org.eclipse.jetty.jmx:name=rmiconnectorserver");
- jettyWebServer.addBean(jmxServer);
-
- // Add JMX Beans
- // TODO(jl): Need to investigate more about injection and jmx
- jettyWebServer.addBean(
- sharedServiceLocator.getService(InterpreterSettingManager.class));
- jettyWebServer.addBean(sharedServiceLocator.getService(NotebookServer.class));
-
- LOG.info("JMX Enabled with port: {}", port);
- } catch (Exception e) {
- LOG.warn("Error while setting JMX", e);
- }
- });
+ if (conf.isJMXEnabled()) {
+ int port = conf.getJMXPort();
+ // Setup JMX
+ MBeanContainer mbeanContainer = new MBeanContainer(ManagementFactory.getPlatformMBeanServer());
+ jettyWebServer.addBean(mbeanContainer);
+ JMXServiceURL jmxURL =
+ new JMXServiceURL(
+ String.format(
+ "service:jmx:rmi://0.0.0.0:%d/jndi/rmi://0.0.0.0:%d/jmxrmi",
+ port, port));
+ ConnectorServer jmxServer = new ConnectorServer(jmxURL, "org.eclipse.jetty.jmx:name=rmiconnectorserver");
+ jettyWebServer.addBean(jmxServer);
+ LOG.info("JMX Enabled with port: {}", port);
+ }
LOG.info("Starting zeppelin server");
try {
@@ -293,6 +291,23 @@ public class ZeppelinServer extends ResourceConfig {
}
}
+ private static void initMetrics(ZeppelinConfiguration conf) {
+ if (conf.isPrometheusMetricEnabled()) {
+ promMetricRegistry = new PrometheusMeterRegistry(PrometheusConfig.DEFAULT);
+ Metrics.addRegistry(promMetricRegistry);
+ }
+ if (conf.isJMXEnabled()) {
+ Metrics.addRegistry(new JmxMeterRegistry(JmxConfig.DEFAULT, Clock.SYSTEM));
+ }
+ new ClassLoaderMetrics().bindTo(Metrics.globalRegistry);
+ new JvmMemoryMetrics().bindTo(Metrics.globalRegistry);
+ new JvmThreadMetrics().bindTo(Metrics.globalRegistry);
+ new FileDescriptorMetrics().bindTo(Metrics.globalRegistry);
+ new ProcessorMetrics().bindTo(Metrics.globalRegistry);
+ new UptimeMetrics().bindTo(Metrics.globalRegistry);
+ new JVMInfoBinder().bindTo(Metrics.globalRegistry);
+ }
+
private static Thread shutdown(ZeppelinConfiguration conf) {
return new Thread(
() -> {
@@ -316,8 +331,9 @@ public class ZeppelinServer extends ResourceConfig {
}
private static Server setupJettyServer(ZeppelinConfiguration conf) {
- ThreadPool threadPool =
- new QueuedThreadPool(conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MAX),
+ InstrumentedQueuedThreadPool threadPool =
+ new InstrumentedQueuedThreadPool(Metrics.globalRegistry, Tags.empty(),
+ conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MAX),
conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_MIN),
conf.getInt(ConfVars.ZEPPELIN_SERVER_JETTY_THREAD_POOL_TIMEOUT));
final Server server = new Server(threadPool);
@@ -333,26 +349,34 @@ public class ZeppelinServer extends ResourceConfig {
httpConfig.setRequestHeaderSize(conf.getJettyRequestHeaderSize());
if (conf.useSsl()) {
LOG.debug("Enabling SSL for Zeppelin Server on port {}", sslPort);
- httpConfig.setSecureScheme("https");
+ httpConfig.setSecureScheme(HttpScheme.HTTPS.asString());
httpConfig.setSecurePort(sslPort);
HttpConfiguration httpsConfig = new HttpConfiguration(httpConfig);
httpsConfig.addCustomizer(new SecureRequestCustomizer());
+ SslConnectionFactory sslConnectionFactory = new SslConnectionFactory(getSslContextFactory(conf), HttpVersion.HTTP_1_1.asString());
+ HttpConnectionFactory httpsConnectionFactory = new HttpConnectionFactory(httpsConfig);
connector =
new ServerConnector(
server,
- new SslConnectionFactory(getSslContextFactory(conf), HttpVersion.HTTP_1_1.asString()),
- new HttpConnectionFactory(httpsConfig));
+ sslConnectionFactory,
+ httpsConnectionFactory);
connector.setPort(sslPort);
+ connector.addBean(new JettySslHandshakeMetrics(Metrics.globalRegistry, Tags.empty()));
} else {
- connector = new ServerConnector(server, new HttpConnectionFactory(httpConfig));
+ HttpConnectionFactory httpConnectionFactory = new HttpConnectionFactory(httpConfig);
+ connector =
+ new ServerConnector(
+ server,
+ httpConnectionFactory);
connector.setPort(port);
}
// Set some timeout options to make debugging easier.
int timeout = 1000 * 30;
connector.setIdleTimeout(timeout);
connector.setHost(conf.getServerAddress());
+ connector.addBean(new JettyConnectionMetrics(Metrics.globalRegistry, Tags.empty()));
server.addConnector(connector);
}
@@ -534,6 +558,16 @@ public class ZeppelinServer extends ResourceConfig {
}
}
+ private static void setupPrometheusContextHandler(WebAppContext webapp) {
+ webapp.addServlet(new ServletHolder(new PrometheusServlet(promMetricRegistry)), "/metrics");
+ }
+
+ private static void setupHealthCheckContextHandler(WebAppContext webapp) {
+ webapp.addServlet(new ServletHolder(new HealthCheckServlet(HealthChecks.getHealthCheckReadinessRegistry())), "/health/readiness");
+ webapp.addServlet(new ServletHolder(new HealthCheckServlet(HealthChecks.getHealthCheckLivenessRegistry())), "/health/liveness");
+ webapp.addServlet(new ServletHolder(new PingServlet()), "/ping");
+ }
+
private static WebAppContext setupWebAppContext(
ContextHandlerCollection contexts, ZeppelinConfiguration conf, String warPath, String contextPath) {
WebAppContext webApp = new WebAppContext();
@@ -621,6 +655,11 @@ public class ZeppelinServer extends ResourceConfig {
// Create `ZeppelinServer` using reflection and setup REST Api
setupRestApiContextHandler(webApp, conf);
+ // prometheus endpoint
+ setupPrometheusContextHandler(webApp);
+ // health endpoints
+ setupHealthCheckContextHandler(webApp);
+
// Notebook server
setupNotebookServer(webApp, conf, sharedServiceLocator);
}
diff --git a/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java b/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java
new file mode 100644
index 0000000..2d4fb9d
--- /dev/null
+++ b/zeppelin-server/src/test/java/org/apache/zeppelin/metric/MetricEndpointTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.metric;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.util.EntityUtils;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.rest.AbstractTestRestApi;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MetricEndpointTest extends AbstractTestRestApi {
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_METRIC_ENABLE_PROMETHEUS.getVarName(),
+ "true");
+ AbstractTestRestApi.startUp(MetricEndpointTest.class.getSimpleName());
+ }
+
+ @AfterClass
+ public static void destroy() throws Exception {
+ AbstractTestRestApi.shutDown();
+ }
+
+ /**
+ * Simple endpoint test
+ * @throws IOException
+ */
+ @Test
+ public void testPrometheusMetricJVM() throws IOException {
+ CloseableHttpResponse get = getHttpClient().execute(new HttpGet(getUrlToTest() + "/metrics"));
+ assertEquals(200, get.getStatusLine().getStatusCode());
+ String response = EntityUtils.toString(get.getEntity());
+ assertTrue("Contains JVM metric", response.contains("jvm_memory"));
+ get.close();
+ }
+
+ protected static String getUrlToTest() {
+ String url = "http://localhost:8080";
+ if (System.getProperty("url") != null) {
+ url = System.getProperty("url");
+ }
+ return url;
+ }
+}
diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml
index 34acc0a..c5ce79f 100644
--- a/zeppelin-zengine/pom.xml
+++ b/zeppelin-zengine/pom.xml
@@ -145,6 +145,19 @@
<version>${quartz.scheduler.version}</version>
</dependency>
+ <!-- Metrics -->
+ <dependency>
+ <groupId>io.micrometer</groupId>
+ <artifactId>micrometer-core</artifactId>
+ <version>${micrometer.version}</version>
+ </dependency>
+ <!-- Healthcheck -->
+ <dependency>
+ <groupId>io.dropwizard.metrics</groupId>
+ <artifactId>metrics-healthchecks</artifactId>
+ <version>${dropwizard.version}</version>
+ </dependency>
+
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java
new file mode 100644
index 0000000..bc07d4e
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/DummyHealthCheck.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import com.codahale.metrics.health.HealthCheck;
+
+public class DummyHealthCheck extends HealthCheck {
+
+ @Override
+ protected Result check() throws Exception {
+ return Result.healthy("DummyHealthCheck okay");
+ }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java
new file mode 100644
index 0000000..5f7f48a
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HdfsHealthCheck.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.zeppelin.notebook.FileSystemStorage;
+
+import com.codahale.metrics.health.HealthCheck;
+
+public class HdfsHealthCheck extends HealthCheck {
+ private final FileSystemStorage fs;
+ private final Path path;
+
+ /**
+ *
+ * @param fs used file system
+ * @param path checked path, which should always be present
+ */
+ public HdfsHealthCheck(FileSystemStorage fs, Path path) {
+ this.fs = fs;
+ this.path= path;
+ }
+ @Override
+ protected Result check() throws Exception {
+ try {
+ if (fs.exists(path)) {
+ return Result.healthy("Filesystem okay");
+ }
+ } catch (IOException e) {
+ return Result.unhealthy("Filesystem unhealthy", e);
+ }
+ return Result.unhealthy("Filesystem unhealthy");
+ }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java
new file mode 100644
index 0000000..089bcd8
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/healthcheck/HealthChecks.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.healthcheck;
+
+import com.codahale.metrics.health.HealthCheckRegistry;
+
+public class HealthChecks {
+
+ private static final HealthCheckRegistry healthcheckReadinessRegistry;
+ private static final HealthCheckRegistry healthcheckLivenessRegistry;
+
+ static {
+
+ healthcheckReadinessRegistry = new HealthCheckRegistry();
+ healthcheckReadinessRegistry.register("dummy", new DummyHealthCheck());
+ // Maybe add the ThreadDeadlockHealthcheck
+ // healthcheckReadinessRegistry.register("deadlock", new ThreadDeadlockHealthCheck());
+
+ healthcheckLivenessRegistry = new HealthCheckRegistry();
+ healthcheckLivenessRegistry.register("dummy", new DummyHealthCheck());
+ }
+
+ private HealthChecks() {
+ // not used
+ }
+
+ public static HealthCheckRegistry getHealthCheckReadinessRegistry() {
+ return healthcheckReadinessRegistry;
+ }
+
+ public static HealthCheckRegistry getHealthCheckLivenessRegistry() {
+ return healthcheckLivenessRegistry;
+ }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
index 985cb47..e578dfc 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSettingManager.java
@@ -26,8 +26,13 @@ import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.reflect.TypeToken;
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.Meter;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tags;
import java.util.Arrays;
import java.util.LinkedHashMap;
+import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -124,8 +129,9 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
* id --> InterpreterSetting
* TODO(zjffdu) change it to name --> InterpreterSetting
*/
- private final Map<String, InterpreterSetting> interpreterSettings =
- Maps.newConcurrentMap();
+ private final Map<String, InterpreterSetting> interpreterSettings = Metrics.gaugeMapSize("interpreter.amount", Tags.empty(),
+ Maps.newConcurrentMap());
+ private final Map<String, List<Meter>> interpreterSettingsMeters = Maps.newConcurrentMap();
private final List<RemoteRepository> interpreterRepositories;
private InterpreterOption defaultOption;
@@ -240,7 +246,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
InterpreterSetting interpreterSetting = new InterpreterSetting(interpreterSettingTemplate);
initInterpreterSetting(interpreterSetting);
if (shouldRegister(interpreterSetting.getGroup())) {
- interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+ addInterpreterSetting(interpreterSetting);
}
}
return;
@@ -281,13 +287,13 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
// remove it first
for (InterpreterSetting setting : interpreterSettings.values()) {
if (setting.getName().equals(savedInterpreterSetting.getName())) {
- interpreterSettings.remove(setting.getId());
+ removeInterpreterSetting(setting.getId());
}
}
savedInterpreterSetting.postProcessing();
LOGGER.info("Create interpreter setting {} from interpreter.json",
savedInterpreterSetting.getName());
- interpreterSettings.put(savedInterpreterSetting.getId(), savedInterpreterSetting);
+ addInterpreterSetting(savedInterpreterSetting);
}
for (InterpreterSetting interpreterSettingTemplate : interpreterSettingTemplates.values()) {
@@ -296,7 +302,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
// add newly detected interpreter if it doesn't exist in interpreter.json
if (!interpreterSettings.containsKey(interpreterSetting.getId())) {
LOGGER.info("Create interpreter setting: {} from interpreter setting template", interpreterSetting.getId());
- interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+ addInterpreterSetting(interpreterSetting);
}
}
@@ -315,6 +321,28 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
}
}
+ private void addInterpreterSetting(InterpreterSetting interpreterSetting) {
+ interpreterSettings.put(interpreterSetting.getId(), interpreterSetting);
+ List<Meter> meters = new LinkedList<>();
+ Gauge size = Gauge
+ .builder("interpreter.group.size", () -> interpreterSetting.getAllInterpreterGroups().size())
+ .description("Size of all interpreter groups")
+ .tags(Tags.of("name", interpreterSetting.getId()))
+ .tags(Tags.of("group", interpreterSetting.getGroup()))
+ .register(Metrics.globalRegistry);
+ meters.add(size);
+ interpreterSettingsMeters.put(interpreterSetting.getId(), meters);
+ }
+
+
+ private void removeInterpreterSetting(String id) {
+ interpreterSettings.remove(id);
+ List<Meter> meters = interpreterSettingsMeters.remove(id);
+ for (Meter meter : meters) {
+ Metrics.globalRegistry.remove(meter);
+ }
+ }
+
public void saveToFile() throws IOException {
InterpreterInfoSaving info = new InterpreterInfoSaving();
info.interpreterSettings = Maps.newHashMap(interpreterSettings);
@@ -322,6 +350,14 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
configStorage.save(info);
}
+ private void initMetrics() {
+ Gauge
+ .builder("interpreter.group.size.total", () -> getAllInterpreterGroup().size())
+ .description("Size of all interpreter groups")
+ .tags()
+ .register(Metrics.globalRegistry);
+ }
+
private void init() throws IOException {
this.includesInterpreters =
Arrays.asList(conf.getString(ConfVars.ZEPPELIN_INTERPRETER_INCLUDES).split(","))
@@ -342,6 +378,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
loadInterpreterSettingFromDefaultDir(true);
loadFromFile();
saveToFile();
+ initMetrics();
// must init Recovery after init of InterpreterSettingManager
recoveryStorage.init();
@@ -827,7 +864,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
setting.setInterpreterOption(option);
setting.setProperties(properties);
initInterpreterSetting(setting);
- interpreterSettings.put(setting.getId(), setting);
+ addInterpreterSetting(setting);
saveToFile();
return setting;
@@ -977,7 +1014,7 @@ public class InterpreterSettingManager implements NoteEventListener, ClusterEven
if (interpreterSettings.containsKey(id)) {
InterpreterSetting intp = interpreterSettings.get(id);
intp.close();
- interpreterSettings.remove(id);
+ removeInterpreterSetting(id);
if (initiator) {
// Event initiator saves the file
// Cluster event accepting nodes do not need to save files repeatedly
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
index c410ddc..2aed172 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJob.java
@@ -31,16 +31,20 @@ import java.util.HashMap;
public class CronJob implements org.quartz.Job {
private static final Logger LOGGER = LoggerFactory.getLogger(CronJob.class);
+ private static final String RESULT_SUCCEEDED = "succeeded";
+ private static final String RESULT_FAILED = "failed";
+ private static final String RESULT_SKIPPED = "skipped";
+
@Override
public void execute(JobExecutionContext context) {
JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
Note note = (Note) jobDataMap.get("note");
- LOGGER.info("Start cron job of note: {}", note.getId());
if (note.haveRunningOrPendingParagraphs()) {
LOGGER.warn(
"execution of the cron job is skipped because there is a running or pending "
+ "paragraph (note id: {})",
note.getId());
+ context.setResult(RESULT_SKIPPED);
return;
}
@@ -56,7 +60,9 @@ public class CronJob implements org.quartz.Job {
null);
try {
note.runAll(authenticationInfo, true, true, new HashMap<>());
+ context.setResult(RESULT_SUCCEEDED);
} catch (Exception e) {
+ context.setResult(RESULT_FAILED);
LOGGER.warn("Fail to run note: {}", note.getName(), e);
}
}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java
new file mode 100644
index 0000000..90a712c
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/CronJobListener.java
@@ -0,0 +1,63 @@
+package org.apache.zeppelin.notebook.scheduler;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.zeppelin.notebook.Note;
+import org.quartz.JobDataMap;
+import org.quartz.JobExecutionContext;
+import org.quartz.JobExecutionException;
+import org.quartz.JobListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.Tag;
+import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.Timer;
+
+public class CronJobListener implements JobListener {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CronJobListener.class);
+
+ // JobExecutionContext -> Timer.Sample
+ private Map<JobExecutionContext, Timer.Sample> cronJobTimerSamples = new HashMap<>();
+
+ @Override
+ public String getName() {
+ return getClass().getSimpleName();
+ }
+
+ @Override
+ public void jobToBeExecuted(JobExecutionContext context) {
+ JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+ Note note = (Note) jobDataMap.get("note");
+ LOGGER.info("Start cron job of note: {}", note.getId());
+ cronJobTimerSamples.put(context, Timer.start(Metrics.globalRegistry));
+ }
+
+ @Override
+ public void jobExecutionVetoed(JobExecutionContext context) {
+ JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+ Note note = (Note) jobDataMap.get("note");
+ LOGGER.info("vetoed cron job of note: {}", note.getId());
+ }
+
+ @Override
+ public void jobWasExecuted(JobExecutionContext context, JobExecutionException jobException) {
+ JobDataMap jobDataMap = context.getJobDetail().getJobDataMap();
+ Note note = (Note) jobDataMap.get("note");
+ String result = StringUtils.defaultString(context.getResult().toString(), "unknown");
+ LOGGER.info("cron job of noteId {} executed with result {}", note.getId(), result);
+ Timer.Sample sample = cronJobTimerSamples.remove(context);
+ if (sample != null) {
+ Tag noteId = Tag.of("nodeid", note.getId());
+ Tag name = Tag.of("name", StringUtils.defaultString(note.getName(), "unknown"));
+ Tag statusTag = Tag.of("result", result);
+ sample.stop(Metrics.timer("cronjob", Tags.of(noteId, name, statusTag)));
+ } else {
+ LOGGER.warn("No Timer.Sample for NoteId {} found", note.getId());
+ }
+ }
+}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
index 0211e0e..e9894a2 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/scheduler/QuartzSchedulerService.java
@@ -56,6 +56,7 @@ public class QuartzSchedulerService implements SchedulerService {
this.zeppelinConfiguration = zeppelinConfiguration;
this.notebook = notebook;
this.scheduler = getScheduler();
+ this.scheduler.getListenerManager().addJobListener(new CronJobListener());
this.scheduler.start();
// Do in a separated thread because there may be many notes,
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
index da6f20e..c380e5a 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/ConfigStorage.java
@@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.healthcheck.HealthChecks;
import org.apache.zeppelin.interpreter.InterpreterInfoSaving;
import org.apache.zeppelin.interpreter.InterpreterSetting;
import org.apache.zeppelin.notebook.NotebookAuthorizationInfoSaving;
@@ -40,6 +41,8 @@ import java.io.IOException;
*/
public abstract class ConfigStorage {
+ protected static final String STORAGE_HEALTHCHECK_NAME = "ConfigStorage";
+
private static ConfigStorage instance;
protected ZeppelinConfiguration zConf;
@@ -96,6 +99,7 @@ public abstract class ConfigStorage {
@VisibleForTesting
public static void reset() {
+ HealthChecks.getHealthCheckLivenessRegistry().unregister(STORAGE_HEALTHCHECK_NAME);
instance = null;
}
}
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
index 005923e..ac7d108 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/storage/FileSystemConfigStorage.java
@@ -20,6 +20,8 @@ package org.apache.zeppelin.storage;
import org.apache.hadoop.fs.Path;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.healthcheck.HdfsHealthCheck;
+import org.apache.zeppelin.healthcheck.HealthChecks;
import org.apache.zeppelin.interpreter.InterpreterInfoSaving;
import org.apache.zeppelin.notebook.FileSystemStorage;
import org.apache.zeppelin.notebook.NotebookAuthorizationInfoSaving;
@@ -56,6 +58,7 @@ public class FileSystemConfigStorage extends ConfigStorage {
this.interpreterSettingPath = fs.makeQualified(new Path(zConf.getInterpreterSettingPath(false)));
this.authorizationPath = fs.makeQualified(new Path(zConf.getNotebookAuthorizationPath(false)));
this.credentialPath = fs.makeQualified(new Path(zConf.getCredentialsPath(false)));
+ HealthChecks.getHealthCheckLivenessRegistry().register(STORAGE_HEALTHCHECK_NAME, new HdfsHealthCheck(this.fs, configPath));
}
@Override