You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2013/09/10 21:40:30 UTC

[16/50] git commit: Adding more docs and some code cleanup

Adding more docs and some code cleanup


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c190b48b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c190b48b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c190b48b

Branch: refs/heads/branch-0.8
Commit: c190b48bf5073b3349b5060c324c890d95bc4260
Parents: 8de8ee5
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sun Sep 8 10:47:45 2013 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sun Sep 8 13:46:28 2013 -0700

----------------------------------------------------------------------
 conf/metrics.properties.template                | 36 +++++++++++++++++++-
 .../apache/spark/metrics/MetricsConfig.scala    |  7 ++--
 .../spark/metrics/sink/MetricsServlet.scala     | 11 +++---
 .../spark/metrics/MetricsConfigSuite.scala      | 19 +++++------
 docs/monitoring.md                              |  9 +++++
 5 files changed, 62 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c190b48b/conf/metrics.properties.template
----------------------------------------------------------------------
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index 6c36f3c..f0e033e 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -31,7 +31,7 @@
 #    1. To add a new sink, set the "class" option to a fully qualified class
 #    name (see examples below).
 #    2. Some sinks involve a polling period. The minimum allowed polling period
-#    is  1 second.
+#    is 1 second.
 #    3. Wild card properties can be overridden by more specific properties.
 #    For example, master.sink.console.period takes precedence over
 #    *.sink.console.period.
@@ -47,6 +47,40 @@
 #    instance master and applications. MetricsServlet may not be configured by self.
 #
 
+## List of available sinks and their properties.
+
+# org.apache.spark.metrics.sink.ConsoleSink
+#   Name:   Default:   Description:
+#   period  10         Poll period
+#   unit    seconds    Units of poll period
+
+# org.apache.spark.metrics.sink.CSVSink
+#   Name:     Default:   Description:
+#   period    10         Poll period
+#   unit      seconds    Units of poll period
+#   directory /tmp       Where to store CSV files
+
+# org.apache.spark.metrics.sink.GangliaSink
+#   Name:     Default:   Description:
+#   host      NONE       Hostname or multicast group of Ganglia server
+#   port      NONE       Port of Ganglia server(s)
+#   period    10         Poll period
+#   unit      seconds    Units of poll period
+#   ttl       1          TTL of messages sent by Ganglia
+#   mode      multicast  Ganglia network mode ('unicast' or 'mulitcast')
+
+# org.apache.spark.metrics.sink.JmxSink
+
+# org.apache.spark.metrics.sink.MetricsServlet
+#   Name:     Default:   Description:
+#   path      VARIES*    Path prefix from the web server root
+#   sample    false      Whether to show entire set of samples for histograms ('false' or 'true')
+#
+# * Default path is /metrics/json for all instances except the master. The master has two paths:
+#     /metrics/aplications/json # App information
+#     /metrics/master/json      # Master information
+
+## Examples
 # Enable JmxSink for all instances by class name
 #*.sink.jmx.class=spark.metrics.sink.JmxSink
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c190b48b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index 0f9c4e0..caab748 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -37,10 +37,9 @@ private[spark] class MetricsConfig(val configFile: Option[String]) extends Loggi
 
   private def setDefaultProperties(prop: Properties) {
     prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
-    prop.setProperty("*.sink.servlet.uri", "/metrics/json")
-    prop.setProperty("*.sink.servlet.sample", "false")
-    prop.setProperty("master.sink.servlet.uri", "/metrics/master/json")
-    prop.setProperty("applications.sink.servlet.uri", "/metrics/applications/json")
+    prop.setProperty("*.sink.servlet.path", "/metrics/json")
+    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
+    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
   }
 
   def initialize() {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c190b48b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
index 4e90dd4..99357fe 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -31,18 +31,21 @@ import org.eclipse.jetty.server.Handler
 import org.apache.spark.ui.JettyUtils
 
 class MetricsServlet(val property: Properties, val registry: MetricRegistry) extends Sink {
-  val SERVLET_KEY_URI = "uri"
+  val SERVLET_KEY_PATH = "path"
   val SERVLET_KEY_SAMPLE = "sample"
 
-  val servletURI = property.getProperty(SERVLET_KEY_URI)
+  val SERVLET_DEFAULT_SAMPLE = false
 
-  val servletShowSample = property.getProperty(SERVLET_KEY_SAMPLE).toBoolean
+  val servletPath = property.getProperty(SERVLET_KEY_PATH)
+
+  val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
+    .getOrElse(SERVLET_DEFAULT_SAMPLE)
 
   val mapper = new ObjectMapper().registerModule(
     new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))
 
   def getHandlers = Array[(String, Handler)](
-    (servletURI, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
+    (servletPath, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
   )
 
   def getMetricsSnapshot(request: HttpServletRequest): String = {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c190b48b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
index 58c94a1..1a9ce8c 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
@@ -30,14 +30,13 @@ class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
     val conf = new MetricsConfig(Option("dummy-file"))
     conf.initialize()
 
-    assert(conf.properties.size() === 5)
+    assert(conf.properties.size() === 4)
     assert(conf.properties.getProperty("test-for-dummy") === null)
 
     val property = conf.getInstance("random")
-    assert(property.size() === 3)
+    assert(property.size() === 2)
     assert(property.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
-    assert(property.getProperty("sink.servlet.uri") === "/metrics/json")
-    assert(property.getProperty("sink.servlet.sample") === "false")
+    assert(property.getProperty("sink.servlet.path") === "/metrics/json")
   }
 
   test("MetricsConfig with properties set") {
@@ -45,22 +44,20 @@ class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
     conf.initialize()
 
     val masterProp = conf.getInstance("master")
-    assert(masterProp.size() === 6)
+    assert(masterProp.size() === 5)
     assert(masterProp.getProperty("sink.console.period") === "20")
     assert(masterProp.getProperty("sink.console.unit") === "minutes")
     assert(masterProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
     assert(masterProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
-    assert(masterProp.getProperty("sink.servlet.uri") === "/metrics/master/json")
-    assert(masterProp.getProperty("sink.servlet.sample") === "false")
+    assert(masterProp.getProperty("sink.servlet.path") === "/metrics/master/json")
 
     val workerProp = conf.getInstance("worker")
-    assert(workerProp.size() === 6)
+    assert(workerProp.size() === 5)
     assert(workerProp.getProperty("sink.console.period") === "10")
     assert(workerProp.getProperty("sink.console.unit") === "seconds")
     assert(workerProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
     assert(workerProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
-    assert(workerProp.getProperty("sink.servlet.uri") === "/metrics/json")
-    assert(workerProp.getProperty("sink.servlet.sample") === "false")
+    assert(workerProp.getProperty("sink.servlet.path") === "/metrics/json")
   }
 
   test("MetricsConfig with subProperties") {
@@ -84,6 +81,6 @@ class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
     assert(consoleProps.size() === 2)
 
     val servletProps = sinkProps("servlet")
-    assert(servletProps.size() === 3)
+    assert(servletProps.size() === 2)
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c190b48b/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 0ec9871..4c4f174 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -31,6 +31,15 @@ set of sinks to which metrics are reported. The following instances are currentl
 * `executor`: A Spark executor.
 * `driver`: The Spark driver process (the process in which your SparkContext is created).
 
+Each instance can report to zero or more _sinks_. Sinks are contained in the
+`org.apache.spark.metrics.sink` package:
+
+* `ConsoleSink`: Logs metrics information to the console.
+* `CSVSink`: Exports metrics data to CSV files at regular intervals.
+* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
+* `JmxSink`: Registers metrics for viewing in a JXM console.
+* `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.
+
 The syntax of the metrics configuration file is defined in an example configuration file, 
 `$SPARK_HOME/conf/metrics.conf.template`.