You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/05/06 20:29:01 UTC
git commit: updated refs/heads/trunk to d36dd50
Updated Branches:
refs/heads/trunk 4f88fc8fc -> d36dd5081
GIRAPH-592: yourkit profiling
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/d36dd508
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/d36dd508
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/d36dd508
Branch: refs/heads/trunk
Commit: d36dd50810929b5dad2d3bab2007e28802087ad6
Parents: 4f88fc8
Author: Nitay Joffe <ni...@apache.org>
Authored: Mon May 6 14:24:46 2013 -0400
Committer: Nitay Joffe <ni...@apache.org>
Committed: Mon May 6 14:24:46 2013 -0400
----------------------------------------------------------------------
CHANGELOG | 2 +
giraph-core/pom.xml | 4 +
.../apache/giraph/conf/GiraphConfiguration.java | 60 +++++++
.../org/apache/giraph/conf/GiraphConstants.java | 5 +
.../org/apache/giraph/utils/YourKitContext.java | 123 +++++++++++++++
.../org/apache/giraph/utils/YourKitProfiler.java | 72 +++++++++
.../org/apache/giraph/worker/BspServiceWorker.java | 31 ++--
pom.xml | 6 +
8 files changed, 287 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 018c2a4..6439581 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
Giraph Change Log
Release 1.0.1 - unreleased
+ GIRAPH-592: YourKit profiler (nitay)
+
GIRAPH-618: Website Documentation: Aggregators (and sharded aggregators)
(majakabiljo)
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-core/pom.xml b/giraph-core/pom.xml
index 56ff468..bfd894c 100644
--- a/giraph-core/pom.xml
+++ b/giraph-core/pom.xml
@@ -409,6 +409,10 @@ under the License.
<dependencies>
<!-- compile dependencies. sorted lexicographically. -->
<dependency>
+ <groupId>com.facebook.thirdparty.yourkit-api</groupId>
+ <artifactId>yjp-controller-api-redist</artifactId>
+ </dependency>
+ <dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index 45a29ff..48f3d4b 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -38,6 +38,7 @@ import org.apache.giraph.partition.ReusesObjectsPartition;
import org.apache.giraph.worker.WorkerContext;
import org.apache.giraph.worker.WorkerObserver;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.net.DNS;
import java.net.UnknownHostException;
@@ -1009,4 +1010,63 @@ public class GiraphConfiguration extends Configuration
super.setClass(name, theClass, xface);
giraphSetParameters.setClass(name, theClass, xface);
}
+
+ /**
+ * Get the output directory to write YourKit snapshots to
+ * @param context Map context
+ * @return output directory
+ */
+ public String getYourKitOutputDir(Mapper.Context context) {
+ final String cacheKey = "giraph.yourkit.outputDirCached";
+ String outputDir = get(cacheKey);
+ if (outputDir == null) {
+ outputDir = getStringVars(YOURKIT_OUTPUT_DIR, YOURKIT_OUTPUT_DIR_DEFAULT,
+ context);
+ set(cacheKey, outputDir);
+ }
+ return outputDir;
+ }
+
+ /**
+ * Get string, replacing variables in the output.
+ *
+ * %JOB_ID% => job id
+ * %TASK_ID% => task id
+ * %USER% => owning user name
+ *
+ * @param key name of key to lookup
+ * @param context mapper context
+ * @return value for key, with variables expanded
+ */
+ public String getStringVars(String key, Mapper.Context context) {
+ return getStringVars(key, null, context);
+ }
+
+ /**
+ * Get string, replacing variables in the output.
+ *
+ * %JOB_ID% => job id
+ * %TASK_ID% => task id
+ * %USER% => owning user name
+ *
+ * @param key name of key to lookup
+ * @param defaultValue value to return if no mapping exists. This can also
+ * have variables, which will be substituted.
+ * @param context mapper context
+ * @return value for key, with variables expanded
+ */
+ public String getStringVars(String key, String defaultValue,
+ Mapper.Context context) {
+ String value = get(key);
+ if (value == null) {
+ if (defaultValue == null) {
+ return null;
+ }
+ value = defaultValue;
+ }
+ value = value.replace("%JOB_ID%", context.getJobID().toString());
+ value = value.replace("%TASK_ID%", context.getTaskAttemptID().toString());
+ value = value.replace("%USER%", get("user.name", "unknown_user"));
+ return value;
+ }
}
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
index 6a5949e..0067c25 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
@@ -586,6 +586,11 @@ public interface GiraphConstants {
BooleanConfOption USE_OUT_OF_CORE_GRAPH =
new BooleanConfOption("giraph.useOutOfCoreGraph", false);
+ /** Directory to write YourKit snapshots to */
+ String YOURKIT_OUTPUT_DIR = "giraph.yourkit.outputDir";
+ /** Default directory to write YourKit snapshots to */
+ String YOURKIT_OUTPUT_DIR_DEFAULT = "/tmp/giraph/%JOB_ID%/%TASK_ID%";
+
/** Maximum number of partitions to hold in memory for each worker. */
IntConfOption MAX_PARTITIONS_IN_MEMORY =
new IntConfOption("giraph.maxPartitionsInMemory", 10);
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
new file mode 100644
index 0000000..5a05113
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.utils;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Joiner;
+import com.google.common.io.Files;
+import com.yourkit.api.Controller;
+import com.yourkit.api.ProfilingModes;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Convenience context for profiling. Hides away all of the exception handling.
+ * Do not instantiate directly, use only through {@link YourKitProfiler}.
+ */
+public class YourKitContext {
+ /** Logger */
+ private static final Logger LOG = Logger.getLogger(YourKitContext.class);
+
+ /** Joiner on path separator */
+ private static Joiner SLASH_JOINER = Joiner.on("/");
+
+ /** The YourKit profiling object, or null if no profiling going on. */
+ private final Controller yourKitController;
+
+ /**
+ * Constructor
+ * @param yourKitController profiling object
+ */
+ YourKitContext(Controller yourKitController) {
+ this.yourKitController = yourKitController;
+ }
+
+ /**
+ * Capture a snapshot
+ * @param flags See {@link com.yourkit.api.ProfilingModes}
+ * @param context map context
+ * @param name unique name for this snapshot
+ */
+ private void snapshot(long flags, Mapper.Context context, String name) {
+ if (yourKitController != null) {
+ String path;
+ try {
+ path = yourKitController.captureSnapshot(flags);
+ // CHECKSTYLE: stop IllegalCatch
+ } catch (Exception e) {
+ // CHECKSTYLE: resume IllegalCatch
+ return;
+ }
+ File destFile = new File(SLASH_JOINER.join(
+ "/tmp", context.getJobID(), context.getTaskAttemptID(),
+ name + ".snapshot"));
+ try {
+ Files.createParentDirs(destFile);
+ Files.move(new File(path), destFile);
+ } catch (IOException e) {
+ LOG.error("Failed to move YourKit snapshot file from " + path +
+ " to " + destFile.getPath(), e);
+ }
+ }
+ }
+
+ /**
+ * This method is just a convenient replacement of
+ * {@link #captureSnapshot(long, java.io.File)} with
+ * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITH_HEAP} for the flags.
+ *
+ * WARNING: This is likely to be VERY slow for large jobs.
+ *
+ * @param context map context
+ * @param name unique name for this snapshot
+ */
+ public void snapshotWithMemory(Mapper.Context context, String name) {
+ snapshot(ProfilingModes.SNAPSHOT_WITH_HEAP, context, name);
+ }
+
+ /**
+ * This method is just a convenient replacement of
+ * {@link #captureSnapshot(long, java.io.File)} with
+ * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITHOUT_HEAP} for the flags.
+ *
+ * @param context map context
+ * @param name unique name for this snapshot
+ */
+ public void snapshotCPUOnly(Mapper.Context context, String name) {
+ snapshot(ProfilingModes.SNAPSHOT_WITHOUT_HEAP, context, name);
+ }
+
+ /**
+ * Stop profiling CPU
+ */
+ public void stop() {
+ if (yourKitController != null) {
+ try {
+ yourKitController.stopCPUProfiling();
+ // CHECKSTYLE: stop IllegalCatch
+ } catch (Exception e) {
+ // CHECKSTYLE: resume IllegalCatch
+ LOG.error("Failed to stop YourKit CPU profiling", e);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
new file mode 100644
index 0000000..c9688bd
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.utils;
+
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.log4j.Logger;
+
+import com.yourkit.api.Controller;
+import com.yourkit.api.ProfilingModes;
+
+/**
+ * Helper for YourKit profiling from within the code.
+ *
+ * See the following for information about usage:
+ * - http://www.yourkit.com/docs/95/help/api.jsp
+ * - http://www.yourkit.com/docs/95/api/index.html
+ *
+ * This class is a simple helper around the API mentioned above that allows you
+ * to easily wrap code with {@link YourKitProfiler#startProfile(GiraphConfiguration)}
+ * followed by any amount of snapshotX calls and finally {@link YourKitContext#stop()}.
+ * See also {@link YourKitContext}.
+ *
+ * As of 05/2013 YourKit is not publishing their API jars to Maven, but their
+ * license allows us to do it, so we have setup a repository to do this.
+ * See https://github.com/facebook/sonatype-yourkit for more info.
+ */
+public class YourKitProfiler {
+ /** Logger */
+ private static final Logger LOG = Logger.getLogger(YourKitProfiler.class);
+
+ /** Don't construct, allow inheritance */
+ protected YourKitProfiler() { }
+
+ /**
+ * Convenient replacement of {@link #startProfilingCPU(long)} with
+ * {@link ProfilingModes.CPU_TRACING} for the mode.
+ *
+ * @param conf GiraphConfiguration
+ * @return profiler context
+ */
+ public static YourKitContext startProfile(GiraphConfiguration conf) {
+ Controller controller = null;
+ try {
+ controller = new Controller();
+ controller.enableStackTelemetry();
+ controller.startCPUProfiling(ProfilingModes.CPU_SAMPLING,
+ Controller.DEFAULT_FILTERS);
+ LOG.debug("Started YourKit profiling CPU");
+ // CHECKSTYLE: stop IllegalCatch
+ } catch (Exception e) {
+ // CHECKSTYLE: resume IllegalCatch
+ LOG.debug("Failed to start YourKit CPU profiling", e);
+ }
+ return new YourKitContext(controller);
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
index 51edbac..03a4876 100644
--- a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
+++ b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java
@@ -18,8 +18,8 @@
package org.apache.giraph.worker;
-
import org.apache.giraph.bsp.ApplicationState;
+import org.apache.giraph.bsp.BspService;
import org.apache.giraph.bsp.CentralizedServiceWorker;
import org.apache.giraph.comm.ServerData;
import org.apache.giraph.comm.WorkerClient;
@@ -32,27 +32,18 @@ import org.apache.giraph.comm.netty.NettyWorkerClientRequestProcessor;
import org.apache.giraph.comm.netty.NettyWorkerServer;
import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.graph.AddressesAndPartitionsWritable;
+import org.apache.giraph.graph.FinishedSuperstepStats;
+import org.apache.giraph.graph.GlobalStats;
import org.apache.giraph.graph.GraphState;
-import org.apache.giraph.bsp.BspService;
import org.apache.giraph.graph.GraphTaskManager;
-import org.apache.giraph.graph.VertexEdgeCount;
-import org.apache.giraph.graph.InputSplitPaths;
import org.apache.giraph.graph.InputSplitEvents;
-import org.apache.giraph.graph.FinishedSuperstepStats;
-import org.apache.giraph.graph.AddressesAndPartitionsWritable;
-import org.apache.giraph.graph.GlobalStats;
-import org.apache.giraph.io.superstep_output.SuperstepOutput;
-import org.apache.giraph.utils.CallableFactory;
-import org.apache.giraph.utils.JMapHistoDumper;
+import org.apache.giraph.graph.InputSplitPaths;
import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexEdgeCount;
import org.apache.giraph.io.VertexOutputFormat;
import org.apache.giraph.io.VertexWriter;
-import org.apache.giraph.partition.Partition;
-import org.apache.giraph.partition.PartitionExchange;
-import org.apache.giraph.partition.PartitionOwner;
-import org.apache.giraph.partition.PartitionStats;
-import org.apache.giraph.partition.PartitionStore;
-import org.apache.giraph.partition.WorkerGraphPartitioner;
+import org.apache.giraph.io.superstep_output.SuperstepOutput;
import org.apache.giraph.master.MasterInfo;
import org.apache.giraph.metrics.GiraphMetrics;
import org.apache.giraph.metrics.GiraphTimer;
@@ -60,6 +51,14 @@ import org.apache.giraph.metrics.GiraphTimerContext;
import org.apache.giraph.metrics.ResetSuperstepMetricsObserver;
import org.apache.giraph.metrics.SuperstepMetricsRegistry;
import org.apache.giraph.metrics.WorkerSuperstepMetrics;
+import org.apache.giraph.partition.Partition;
+import org.apache.giraph.partition.PartitionExchange;
+import org.apache.giraph.partition.PartitionOwner;
+import org.apache.giraph.partition.PartitionStats;
+import org.apache.giraph.partition.PartitionStore;
+import org.apache.giraph.partition.WorkerGraphPartitioner;
+import org.apache.giraph.utils.CallableFactory;
+import org.apache.giraph.utils.JMapHistoDumper;
import org.apache.giraph.utils.LoggerUtils;
import org.apache.giraph.utils.MemoryUtils;
import org.apache.giraph.utils.ProgressableUtils;
http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7d7fec9..0afd675 100644
--- a/pom.xml
+++ b/pom.xml
@@ -262,6 +262,7 @@ under the License.
<hive.version>0.10.0</hive.version>
<facebook-hadoop.version>0.20.0</facebook-hadoop.version>
+ <yourkit-api.version>9.5.6</yourkit-api.version>
<forHadoop>for-hadoop-${hadoop.version}</forHadoop>
</properties>
@@ -1018,6 +1019,11 @@ under the License.
<version>1.2</version>
</dependency>
<dependency>
+ <groupId>com.facebook.thirdparty.yourkit-api</groupId>
+ <artifactId>yjp-controller-api-redist</artifactId>
+ <version>${yourkit-api.version}</version>
+ </dependency>
+ <dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.5.3.Final</version>