You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zj...@apache.org on 2015/05/09 02:42:20 UTC

[26/50] hadoop git commit: HDFS-8284. Update documentation about how to use HTrace with HDFS (Masatake Iwasaki via Colin P. McCabe)

HDFS-8284. Update documentation about how to use HTrace with HDFS (Masatake Iwasaki via Colin P. McCabe)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cd8fdbf0
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cd8fdbf0
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cd8fdbf0

Branch: refs/heads/YARN-2928
Commit: cd8fdbf091f2cae1779bd1e0dcb197a1db27ef3d
Parents: d51c820
Author: Colin Patrick Mccabe <cm...@cloudera.com>
Authored: Fri May 8 12:30:03 2015 -0700
Committer: Zhijie Shen <zj...@apache.org>
Committed: Fri May 8 17:32:50 2015 -0700

----------------------------------------------------------------------
 .../src/main/resources/core-default.xml         |  12 --
 .../hadoop-common/src/site/markdown/Tracing.md  | 128 ++++++++++---------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |   3 +
 .../src/main/resources/hdfs-default.xml         |  18 +++
 4 files changed, 87 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd8fdbf0/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 34284d1..97e01a8 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -1756,18 +1756,6 @@ for ldap providers in the same way as above does.
   </description>
 </property>
 
-<property>
-  <name>hadoop.htrace.spanreceiver.classes</name>
-  <value></value>
-  <description>
-    A comma separated list of the fully-qualified class name of classes 
-    implementing SpanReceiver. The tracing system works by collecting 
-    information in structs called 'Spans'. It is up to you to choose 
-    how you want to receive this information by implementing the 
-    SpanReceiver interface.
-  </description>
-</property>
-
  <property>
   <name>ipc.server.max.connections</name>
   <value>0</value>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd8fdbf0/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md b/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md
index 3ef35b2..76b5ee0 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md
@@ -18,13 +18,13 @@ Enabling Dapper-like Tracing in Hadoop
 * [Enabling Dapper-like Tracing in Hadoop](#Enabling_Dapper-like_Tracing_in_Hadoop)
     * [Dapper-like Tracing in Hadoop](#Dapper-like_Tracing_in_Hadoop)
         * [HTrace](#HTrace)
-        * [Samplers](#Samplers)
         * [SpanReceivers](#SpanReceivers)
-        * [Setting up ZipkinSpanReceiver](#Setting_up_ZipkinSpanReceiver)
         * [Dynamic update of tracing configuration](#Dynamic_update_of_tracing_configuration)
         * [Starting tracing spans by HTrace API](#Starting_tracing_spans_by_HTrace_API)
         * [Sample code for tracing](#Sample_code_for_tracing)
-  
+        * [Starting tracing spans by configuration for HDFS client](#Starting_tracing_spans_by_configuration_for_HDFS_client)
+
+
 Dapper-like Tracing in Hadoop
 -----------------------------
 
@@ -32,83 +32,51 @@ Dapper-like Tracing in Hadoop
 
 [HDFS-5274](https://issues.apache.org/jira/browse/HDFS-5274) added support for tracing requests through HDFS,
 using the open source tracing library,
-[Apache HTrace](https://git-wip-us.apache.org/repos/asf/incubator-htrace.git). 
+[Apache HTrace](http://htrace.incubator.apache.org/).
 Setting up tracing is quite simple, however it requires some very minor changes to your client code.
 
-### Samplers
-
-Configure the samplers in `core-site.xml` property: `hadoop.htrace.sampler`.
-The value can be NeverSampler, AlwaysSampler or ProbabilitySampler.
-NeverSampler: HTrace is OFF for all spans;
-AlwaysSampler: HTrace is ON for all spans;
-ProbabilitySampler: HTrace is ON for some percentage% of top-level spans.
-
-      <property>
-        <name>hadoop.htrace.sampler</name>
-        <value>NeverSampler</value>
-      </property>
-
 ### SpanReceivers
 
 The tracing system works by collecting information in structs called 'Spans'.
 It is up to you to choose how you want to receive this information
-by implementing the SpanReceiver interface, which defines one method:
+by using implementation of [SpanReceiver](http://htrace.incubator.apache.org/#Span_Receivers)
+interface bundled with HTrace or implementing it by yourself.
 
-    public void receiveSpan(Span span);
+[HTrace](http://htrace.incubator.apache.org/) provides options such as
 
-Configure what SpanReceivers you'd like to use
+* FlumeSpanReceiver
+* HBaseSpanReceiver
+* HTracedRESTReceiver
+* ZipkinSpanReceiver
+
+In order to set up SpanReceivers for HDFS servers,
+configure what SpanReceivers you'd like to use
 by putting a comma separated list of the fully-qualified class name of classes implementing SpanReceiver
-in `core-site.xml` property: `hadoop.htrace.spanreceiver.classes`.
+in `hdfs-site.xml` property: `dfs.htrace.spanreceiver.classes`.
 
+```xml
       <property>
-        <name>hadoop.htrace.spanreceiver.classes</name>
+        <name>dfs.htrace.spanreceiver.classes</name>
         <value>org.apache.htrace.impl.LocalFileSpanReceiver</value>
       </property>
       <property>
-        <name>hadoop.htrace.local-file-span-receiver.path</name>
+        <name>dfs.htrace.local-file-span-receiver.path</name>
         <value>/var/log/hadoop/htrace.out</value>
       </property>
+```
 
 You can omit package name prefix if you use span receiver bundled with HTrace.
 
+```xml
       <property>
-        <name>hadoop.htrace.spanreceiver.classes</name>
+        <name>dfs.htrace.spanreceiver.classes</name>
         <value>LocalFileSpanReceiver</value>
       </property>
+```
 
-### Setting up ZipkinSpanReceiver
-
-Instead of implementing SpanReceiver by yourself,
-you can use `ZipkinSpanReceiver` which uses
-[Zipkin](https://github.com/twitter/zipkin) for collecting and displaying tracing data.
-
-In order to use `ZipkinSpanReceiver`,
-you need to download and setup [Zipkin](https://github.com/twitter/zipkin) first.
-
-you also need to add the jar of `htrace-zipkin` to the classpath of Hadoop on each node.
-Here is example setup procedure.
-
-      $ git clone https://github.com/cloudera/htrace
-      $ cd htrace/htrace-zipkin
-      $ mvn compile assembly:single
-      $ cp target/htrace-zipkin-*-jar-with-dependencies.jar $HADOOP_HOME/share/hadoop/common/lib/
-
-The sample configuration for `ZipkinSpanReceiver` is shown below.
-By adding these to `core-site.xml` of NameNode and DataNodes, `ZipkinSpanReceiver` is initialized on the startup.
-You also need this configuration on the client node in addition to the servers.
-
-      <property>
-        <name>hadoop.htrace.spanreceiver.classes</name>
-        <value>ZipkinSpanReceiver</value>
-      </property>
-      <property>
-        <name>hadoop.htrace.zipkin.collector-hostname</name>
-        <value>192.168.1.2</value>
-      </property>
-      <property>
-        <name>hadoop.htrace.zipkin.collector-port</name>
-        <value>9410</value>
-      </property>
+You also need to add the jar bundling SpanReceiver to the classpath of Hadoop
+on each node. (LocalFileSpanReceiver in the example above is included in the
+jar of htrace-core which is bundled with Hadoop.)
 
 ### Dynamic update of tracing configuration
 
@@ -136,8 +104,8 @@ You need to run the command against all servers if you want to update the config
 You need to specify the class name of span receiver as argument of `-class` option.
 You can specify the configuration associated with span receiver by `-Ckey=value` options.
 
-      $ hadoop trace -add -class LocalFileSpanReceiver -Chadoop.htrace.local-file-span-receiver.path=/tmp/htrace.out -host 192.168.56.2:9000
-      Added trace span receiver 2 with configuration hadoop.htrace.local-file-span-receiver.path = /tmp/htrace.out
+      $ hadoop trace -add -class LocalFileSpanReceiver -Cdfs.htrace.local-file-span-receiver.path=/tmp/htrace.out -host 192.168.56.2:9000
+      Added trace span receiver 2 with configuration dfs.htrace.local-file-span-receiver.path = /tmp/htrace.out
 
       $ hadoop trace -list -host 192.168.56.2:9000
       ID  CLASS
@@ -149,8 +117,9 @@ In order to trace, you will need to wrap the traced logic with **tracing span**
 When there is running tracing spans,
 the tracing information is propagated to servers along with RPC requests.
 
-In addition, you need to initialize `SpanReceiver` once per process.
+In addition, you need to initialize `SpanReceiverHost` once per process.
 
+```java
     import org.apache.hadoop.hdfs.HdfsConfiguration;
     import org.apache.hadoop.tracing.SpanReceiverHost;
     import org.apache.htrace.Sampler;
@@ -169,14 +138,17 @@ In addition, you need to initialize `SpanReceiver` once per process.
         } finally {
           if (ts != null) ts.close();
         }
+```
 
-### Sample code for tracing
+### Sample code for tracing by HTrace API
 
 The `TracingFsShell.java` shown below is the wrapper of FsShell
 which start tracing span before invoking HDFS shell command.
 
+```java
     import org.apache.hadoop.conf.Configuration;
     import org.apache.hadoop.fs.FsShell;
+    import org.apache.hadoop.hdfs.DFSConfigKeys;
     import org.apache.hadoop.tracing.SpanReceiverHost;
     import org.apache.hadoop.util.ToolRunner;
     import org.apache.htrace.Sampler;
@@ -189,7 +161,7 @@ which start tracing span before invoking HDFS shell command.
         FsShell shell = new FsShell();
         conf.setQuietMode(false);
         shell.setConf(conf);
-        SpanReceiverHost.getInstance(conf);
+        SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX);
         int res = 0;
         TraceScope ts = null;
         try {
@@ -202,8 +174,40 @@ which start tracing span before invoking HDFS shell command.
         System.exit(res);
       }
     }
+```
 
 You can compile and execute this code as shown below.
 
     $ javac -cp `hadoop classpath` TracingFsShell.java
     $ java -cp .:`hadoop classpath` TracingFsShell -ls /
+
+### Starting tracing spans by configuration for HDFS client
+
+The DFSClient can enable tracing internally. This allows you to use HTrace with
+your client without modifying the client source code.
+
+Configure the span receivers and samplers in `hdfs-site.xml`
+by properties `dfs.client.htrace.sampler` and `dfs.client.htrace.sampler`.
+The value of `dfs.client.htrace.sampler` can be NeverSampler, AlwaysSampler or ProbabilitySampler.
+
+* NeverSampler: HTrace is OFF for all requests to namenodes and datanodes;
+* AlwaysSampler: HTrace is ON for all requests to namenodes and datanodes;
+* ProbabilitySampler: HTrace is ON for some percentage% of  requests to namenodes and datanodes
+
+You do not need to enable this if your client program has been modified
+to use HTrace.
+
+```xml
+      <property>
+        <name>dfs.client.htrace.spanreceiver.classes</name>
+        <value>LocalFileSpanReceiver</value>
+      </property>
+      <property>
+        <name>dfs.client.htrace.sampler</name>
+        <value>ProbabilitySampler</value>
+      </property>
+      <property>
+        <name>dfs.client.htrace.sampler.fraction</name>
+        <value>0.5</value>
+      </property>
+```

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd8fdbf0/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 236a356..7e958b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -527,6 +527,9 @@ Release 2.8.0 - UNRELEASED
 
     HDFS-5640. Add snapshot methods to FileContext. (Rakesh R via cnauroth)
 
+    HDFS-8284. Update documentation about how to use HTrace with HDFS (Masatake
+    Iwasaki via Colin P. McCabe)
+
   OPTIMIZATIONS
 
     HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd8fdbf0/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 9e7061a..fe1d1de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -2316,4 +2316,22 @@
     the delay time will increase exponentially(double) for each retry.
   </description>
 </property>
+
+<property>
+  <name>dfs.htrace.spanreceiver.classes</name>
+  <value></value>
+  <description>
+    The class name of the HTrace SpanReceiver for the NameNode and DataNode.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.htrace.spanreceiver.classes</name>
+  <value></value>
+  <description>
+    The class name of the HTrace SpanReceiver for the HDFS client. You do not
+    need to enable this if your client program has been modified to use HTrace.
+  </description>
+</property>
+
 </configuration>