You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/05/06 20:29:01 UTC

git commit: updated refs/heads/trunk to d36dd50

Updated Branches:
  refs/heads/trunk 4f88fc8fc -> d36dd5081


GIRAPH-592: yourkit profiling


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/d36dd508
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/d36dd508
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/d36dd508

Branch: refs/heads/trunk
Commit: d36dd50810929b5dad2d3bab2007e28802087ad6
Parents: 4f88fc8
Author: Nitay Joffe <ni...@apache.org>
Authored: Mon May 6 14:24:46 2013 -0400
Committer: Nitay Joffe <ni...@apache.org>
Committed: Mon May 6 14:24:46 2013 -0400

----------------------------------------------------------------------
 CHANGELOG                                          |    2 +
 giraph-core/pom.xml                                |    4 +
 .../apache/giraph/conf/GiraphConfiguration.java    |   60 +++++++
 .../org/apache/giraph/conf/GiraphConstants.java    |    5 +
 .../org/apache/giraph/utils/YourKitContext.java    |  123 +++++++++++++++
 .../org/apache/giraph/utils/YourKitProfiler.java   |   72 +++++++++
 .../org/apache/giraph/worker/BspServiceWorker.java |   31 ++--
 pom.xml                                            |    6 +
 8 files changed, 287 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 018c2a4..6439581 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 Giraph Change Log
 
 Release 1.0.1 - unreleased
+  GIRAPH-592: YourKit profiler (nitay)
+
   GIRAPH-618: Website Documentation: Aggregators (and sharded aggregators)
   (majakabiljo)
 

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-core/pom.xml b/giraph-core/pom.xml
index 56ff468..bfd894c 100644
--- a/giraph-core/pom.xml
+++ b/giraph-core/pom.xml
@@ -409,6 +409,10 @@ under the License.
   <dependencies>
     <!-- compile dependencies. sorted lexicographically. -->
     <dependency>
+      <groupId>com.facebook.thirdparty.yourkit-api</groupId>
+      <artifactId>yjp-controller-api-redist</artifactId>
+    </dependency>
+    <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index 45a29ff..48f3d4b 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -38,6 +38,7 @@ import org.apache.giraph.partition.ReusesObjectsPartition;
 import org.apache.giraph.worker.WorkerContext;
 import org.apache.giraph.worker.WorkerObserver;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.net.DNS;
 
 import java.net.UnknownHostException;
@@ -1009,4 +1010,63 @@ public class GiraphConfiguration extends Configuration
     super.setClass(name, theClass, xface);
     giraphSetParameters.setClass(name, theClass, xface);
   }
+
+  /**
+   * Get the output directory to write YourKit snapshots to
+   * @param context Map context
+   * @return output directory
+   */
+  public String getYourKitOutputDir(Mapper.Context context) {
+    final String cacheKey = "giraph.yourkit.outputDirCached";
+    String outputDir = get(cacheKey);
+    if (outputDir == null) {
+      outputDir = getStringVars(YOURKIT_OUTPUT_DIR, YOURKIT_OUTPUT_DIR_DEFAULT,
+          context);
+      set(cacheKey, outputDir);
+    }
+    return outputDir;
+  }
+
+  /**
+   * Get string, replacing variables in the output.
+   *
+   * %JOB_ID% => job id
+   * %TASK_ID% => task id
+   * %USER% => owning user name
+   *
+   * @param key name of key to lookup
+   * @param context mapper context
+   * @return value for key, with variables expanded
+   */
+  public String getStringVars(String key, Mapper.Context context) {
+    return getStringVars(key, null, context);
+  }
+
+  /**
+   * Get string, replacing variables in the output.
+   *
+   * %JOB_ID% => job id
+   * %TASK_ID% => task id
+   * %USER% => owning user name
+   *
+   * @param key name of key to lookup
+   * @param defaultValue value to return if no mapping exists. This can also
+   *                     have variables, which will be substituted.
+   * @param context mapper context
+   * @return value for key, with variables expanded
+   */
+  public String getStringVars(String key, String defaultValue,
+                              Mapper.Context context) {
+    String value = get(key);
+    if (value == null) {
+      if (defaultValue == null) {
+        return null;
+      }
+      value = defaultValue;
+    }
+    value = value.replace("%JOB_ID%", context.getJobID().toString());
+    value = value.replace("%TASK_ID%", context.getTaskAttemptID().toString());
+    value = value.replace("%USER%", get("user.name", "unknown_user"));
+    return value;
+  }
 }

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
index 6a5949e..0067c25 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
@@ -586,6 +586,11 @@ public interface GiraphConstants {
   BooleanConfOption USE_OUT_OF_CORE_GRAPH =
       new BooleanConfOption("giraph.useOutOfCoreGraph", false);
 
+  /** Directory to write YourKit snapshots to */
+  String YOURKIT_OUTPUT_DIR = "giraph.yourkit.outputDir";
+  /** Default directory to write YourKit snapshots to */
+  String YOURKIT_OUTPUT_DIR_DEFAULT = "/tmp/giraph/%JOB_ID%/%TASK_ID%";
+
   /** Maximum number of partitions to hold in memory for each worker. */
   IntConfOption MAX_PARTITIONS_IN_MEMORY =
       new IntConfOption("giraph.maxPartitionsInMemory", 10);

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
new file mode 100644
index 0000000..5a05113
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.utils;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Joiner;
+import com.google.common.io.Files;
+import com.yourkit.api.Controller;
+import com.yourkit.api.ProfilingModes;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Convenience context for profiling. Hides away all of the exception handling.
+ * Do not instantiate directly, use only through {@link YourKitProfiler}.
+ */
+public class YourKitContext {
+  /** Logger */
+  private static final Logger LOG = Logger.getLogger(YourKitContext.class);
+
+  /** Joiner on path separator */
+  private static Joiner SLASH_JOINER = Joiner.on("/");
+
+  /** The YourKit profiling object, or null if no profiling going on. */
+  private final Controller yourKitController;
+
+  /**
+   * Constructor
+   * @param yourKitController profiling object
+   */
+  YourKitContext(Controller yourKitController) {
+    this.yourKitController = yourKitController;
+  }
+
+  /**
+   * Capture a snapshot
+   * @param flags See {@link com.yourkit.api.ProfilingModes}
+   * @param context map context
+   * @param name unique name for this snapshot
+   */
+  private void snapshot(long flags, Mapper.Context context, String name) {
+    if (yourKitController != null) {
+      String path;
+      try {
+        path = yourKitController.captureSnapshot(flags);
+        // CHECKSTYLE: stop IllegalCatch
+      } catch (Exception e) {
+        // CHECKSTYLE: resume IllegalCatch
+        return;
+      }
+      File destFile = new File(SLASH_JOINER.join(
+          "/tmp", context.getJobID(), context.getTaskAttemptID(),
+          name + ".snapshot"));
+      try {
+        Files.createParentDirs(destFile);
+        Files.move(new File(path), destFile);
+      } catch (IOException e) {
+        LOG.error("Failed to move YourKit snapshot file from " + path +
+            " to " + destFile.getPath(), e);
+      }
+    }
+  }
+
+  /**
+   * This method is just a convenient replacement of
+   * {@link #captureSnapshot(long, java.io.File)} with
+   * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITH_HEAP} for the flags.
+   *
+   * WARNING: This is likely to be VERY slow for large jobs.
+   *
+   * @param context map context
+   * @param name unique name for this snapshot
+   */
+  public void snapshotWithMemory(Mapper.Context context, String name) {
+    snapshot(ProfilingModes.SNAPSHOT_WITH_HEAP, context, name);
+  }
+
+  /**
+   * This method is just a convenient replacement of
+   * {@link #captureSnapshot(long, java.io.File)} with
+   * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITHOUT_HEAP} for the flags.
+   *
+   * @param context map context
+   * @param name unique name for this snapshot
+   */
+  public void snapshotCPUOnly(Mapper.Context context, String name) {
+    snapshot(ProfilingModes.SNAPSHOT_WITHOUT_HEAP, context, name);
+  }
+
+  /**
+   * Stop profiling CPU
+   */
+  public void stop() {
+    if (yourKitController != null) {
+      try {
+        yourKitController.stopCPUProfiling();
+        // CHECKSTYLE: stop IllegalCatch
+      } catch (Exception e) {
+        // CHECKSTYLE: resume IllegalCatch
+        LOG.error("Failed to stop YourKit CPU profiling", e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
new file mode 100644
index 0000000..c9688bd
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.utils;
+
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.log4j.Logger;
+
+import com.yourkit.api.Controller;
+import com.yourkit.api.ProfilingModes;
+
+/**
+ * Helper for YourKit profiling from within the code.
+ *
+ * See the following for information about usage:
+ *  - http://www.yourkit.com/docs/95/help/api.jsp
+ *  - http://www.yourkit.com/docs/95/api/index.html
+ *
+ * This class is a simple helper around the API mentioned above that allows you
+ * to easily wrap code with {@link YourKitProfiler#startProfile(GiraphConfiguration)}
+ * followed by any amount of snapshotX calls and finally {@link YourKitContext#stop()}.
+ * See also {@link YourKitContext}.
+ *
+ * As of 05/2013 YourKit is not publishing their API jars to Maven, but their
+ * license allows us to do it, so we have setup a repository to do this.
+ * See https://github.com/facebook/sonatype-yourkit for more info.
+ */
+public class YourKitProfiler {
+  /** Logger */
+  private static final Logger LOG = Logger.getLogger(YourKitProfiler.class);
+
+  /** Don't construct, allow inheritance */
+  protected YourKitProfiler() { }
+
+  /**
+   * Convenient replacement of {@link #startProfilingCPU(long)} with
+   * {@link ProfilingModes.CPU_TRACING} for the mode.
+   *
+   * @param conf GiraphConfiguration
+   * @return profiler context
+   */
+  public static YourKitContext startProfile(GiraphConfiguration conf) {
+    Controller controller = null;
+    try {
+      controller = new Controller();
+      controller.enableStackTelemetry();
+      controller.startCPUProfiling(ProfilingModes.CPU_SAMPLING,
+          Controller.DEFAULT_FILTERS);
+      LOG.debug("Started YourKit profiling CPU");
+      // CHECKSTYLE: stop IllegalCatch
+    } catch (Exception e) {
+      // CHECKSTYLE: resume IllegalCatch
+      LOG.debug("Failed to start YourKit CPU profiling", e);
+    }
+    return new YourKitContext(controller);
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
index 51edbac..03a4876 100644
--- a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
+++ b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
@@ -18,8 +18,8 @@
 
 package org.apache.giraph.worker;
 
-
 import org.apache.giraph.bsp.ApplicationState;
+import org.apache.giraph.bsp.BspService;
 import org.apache.giraph.bsp.CentralizedServiceWorker;
 import org.apache.giraph.comm.ServerData;
 import org.apache.giraph.comm.WorkerClient;
@@ -32,27 +32,18 @@ import org.apache.giraph.comm.netty.NettyWorkerClientRequestProcessor;
 import org.apache.giraph.comm.netty.NettyWorkerServer;
 import org.apache.giraph.conf.GiraphConstants;
 import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.graph.AddressesAndPartitionsWritable;
+import org.apache.giraph.graph.FinishedSuperstepStats;
+import org.apache.giraph.graph.GlobalStats;
 import org.apache.giraph.graph.GraphState;
-import org.apache.giraph.bsp.BspService;
 import org.apache.giraph.graph.GraphTaskManager;
-import org.apache.giraph.graph.VertexEdgeCount;
-import org.apache.giraph.graph.InputSplitPaths;
 import org.apache.giraph.graph.InputSplitEvents;
-import org.apache.giraph.graph.FinishedSuperstepStats;
-import org.apache.giraph.graph.AddressesAndPartitionsWritable;
-import org.apache.giraph.graph.GlobalStats;
-import org.apache.giraph.io.superstep_output.SuperstepOutput;
-import org.apache.giraph.utils.CallableFactory;
-import org.apache.giraph.utils.JMapHistoDumper;
+import org.apache.giraph.graph.InputSplitPaths;
 import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexEdgeCount;
 import org.apache.giraph.io.VertexOutputFormat;
 import org.apache.giraph.io.VertexWriter;
-import org.apache.giraph.partition.Partition;
-import org.apache.giraph.partition.PartitionExchange;
-import org.apache.giraph.partition.PartitionOwner;
-import org.apache.giraph.partition.PartitionStats;
-import org.apache.giraph.partition.PartitionStore;
-import org.apache.giraph.partition.WorkerGraphPartitioner;
+import org.apache.giraph.io.superstep_output.SuperstepOutput;
 import org.apache.giraph.master.MasterInfo;
 import org.apache.giraph.metrics.GiraphMetrics;
 import org.apache.giraph.metrics.GiraphTimer;
@@ -60,6 +51,14 @@ import org.apache.giraph.metrics.GiraphTimerContext;
 import org.apache.giraph.metrics.ResetSuperstepMetricsObserver;
 import org.apache.giraph.metrics.SuperstepMetricsRegistry;
 import org.apache.giraph.metrics.WorkerSuperstepMetrics;
+import org.apache.giraph.partition.Partition;
+import org.apache.giraph.partition.PartitionExchange;
+import org.apache.giraph.partition.PartitionOwner;
+import org.apache.giraph.partition.PartitionStats;
+import org.apache.giraph.partition.PartitionStore;
+import org.apache.giraph.partition.WorkerGraphPartitioner;
+import org.apache.giraph.utils.CallableFactory;
+import org.apache.giraph.utils.JMapHistoDumper;
 import org.apache.giraph.utils.LoggerUtils;
 import org.apache.giraph.utils.MemoryUtils;
 import org.apache.giraph.utils.ProgressableUtils;

http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7d7fec9..0afd675 100644
--- a/pom.xml
+++ b/pom.xml
@@ -262,6 +262,7 @@ under the License.
     <hive.version>0.10.0</hive.version>
 
     <facebook-hadoop.version>0.20.0</facebook-hadoop.version>
+    <yourkit-api.version>9.5.6</yourkit-api.version>
     <forHadoop>for-hadoop-${hadoop.version}</forHadoop>
   </properties>
 
@@ -1018,6 +1019,11 @@ under the License.
         <version>1.2</version>
       </dependency>
       <dependency>
+        <groupId>com.facebook.thirdparty.yourkit-api</groupId>
+        <artifactId>yjp-controller-api-redist</artifactId>
+        <version>${yourkit-api.version}</version>
+      </dependency>
+      <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty</artifactId>
         <version>3.5.3.Final</version>