You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2014/02/05 00:43:27 UTC
git commit: TEZ-797. Add documentation for some of the Tez config
parameters. (sseth)
Updated Branches:
refs/heads/master 4e51cb851 -> 2e93f1763
TEZ-797. Add documentation for some of the Tez config parameters.
(sseth)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tez/commit/2e93f176
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tez/tree/2e93f176
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tez/diff/2e93f176
Branch: refs/heads/master
Commit: 2e93f176394bcc400bdda8f43cacd9c8a1a01d4d
Parents: 4e51cb8
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Feb 4 15:43:06 2014 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Tue Feb 4 15:43:06 2014 -0800
----------------------------------------------------------------------
.../org/apache/tez/common/TezJobConfig.java | 15 +++++++--
.../apache/tez/dag/api/TezConfiguration.java | 32 ++++++++++++++++++--
.../vertexmanager/ShuffleVertexManager.java | 10 ++++++
3 files changed, 52 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/2e93f176/tez-api/src/main/java/org/apache/tez/common/TezJobConfig.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/common/TezJobConfig.java b/tez-api/src/main/java/org/apache/tez/common/TezJobConfig.java
index 711a13a..8d347d7 100644
--- a/tez-api/src/main/java/org/apache/tez/common/TezJobConfig.java
+++ b/tez-api/src/main/java/org/apache/tez/common/TezJobConfig.java
@@ -291,17 +291,28 @@ public class TezJobConfig {
"tez.runtime.intermediate-output.value.class";
public static final String TEZ_RUNTIME_INTERMEDIATE_INPUT_VALUE_CLASS =
"tez.runtime.intermediate-input.value.class";
-
+
+
+ /** Whether intermediate output should be compressed or not */
public static final String TEZ_RUNTIME_INTERMEDIATE_OUTPUT_SHOULD_COMPRESS =
"tez.runtime.intermediate-output.should-compress";
+ /** Whether intermediate input is compressed */
public static final String TEZ_RUNTIME_INTERMEDIATE_INPUT_IS_COMPRESSED =
"tez.runtime.intermediate-input.is-compressed";
-
+ /**
+ * The coded to be used if compressing intermediate output. Only applicable if
+ * tez.runtime.intermediate-output.should-compress is enabled.
+ */
public static final String TEZ_RUNTIME_INTERMEDIATE_OUTPUT_COMPRESS_CODEC =
"tez.runtime.intermediate-output.compress.codec";
+ /**
+ * The coded to be used when reading intermediate compressed input. Only
+ * applicable if tez.runtime.intermediate-input.is-compressed is enabled.
+ */
public static final String TEZ_RUNTIME_INTERMEDIATE_INPUT_COMPRESS_CODEC =
"tez.runtime.intermediate-input.compress.codec";
+
public static final String TEZ_RUNTIME_INTERMEDIATE_INPUT_KEY_SECONDARY_COMPARATOR_CLASS =
"tez.runtime.intermediate-input.key.secondary.comparator.class";
http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/2e93f176/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 2a3cde9..ba80854 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -41,6 +41,7 @@ public class TezConfiguration extends Configuration {
public static final String TEZ_AM_PREFIX = TEZ_PREFIX + "am.";
public static final String TEZ_TASK_PREFIX = TEZ_PREFIX + "task.";
+ /** The staging dir used while submitting DAGs */
public static final String TEZ_AM_STAGING_DIR = TEZ_PREFIX + "staging-dir";
public static final String TEZ_AM_STAGING_DIR_DEFAULT = "/tmp/tez/staging";
@@ -54,7 +55,8 @@ public class TezConfiguration extends Configuration {
public static final String TEZ_AM_ABORT_ALL_OUTPUTS_ON_DAG_FAILURE =
TEZ_AM_PREFIX + "abort-all-outputs-on-dag-failure";
public static final boolean TEZ_AM_ABORT_ALL_OUTPUTS_ON_DAG_FAILURE_DEFAULT = true;
-
+
+ /** Java options for the Tez AppMaster process. */
public static final String TEZ_AM_JAVA_OPTS = TEZ_AM_PREFIX
+ "java.opts";
public static final String TEZ_AM_JAVA_OPTS_DEFAULT = " -Xmx1024m ";
@@ -123,6 +125,7 @@ public class TezConfiguration extends Configuration {
TEZ_AM_PREFIX + "client.am.port-range";
+ /** The amount of memory to be used by the AppMaster */
public static final String TEZ_AM_RESOURCE_MEMORY_MB = TEZ_AM_PREFIX
+ "resource.memory.mb";
public static final int TEZ_AM_RESOURCE_MEMORY_MB_DEFAULT = 1536;
@@ -146,10 +149,12 @@ public class TezConfiguration extends Configuration {
public static final String TEZ_AM_PLAN_REMOTE_PATH = TEZ_AM_PREFIX
+ "dag-am-plan.remote.path";
+ /** The maximum heartbeat interval between the AM and RM in milliseconds */
public static final String TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX = TEZ_AM_PREFIX
+ "am-rm.heartbeat.interval-ms.max";
public static final int TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX_DEFAULT = 1000;
+ /** The maximum amount of time, in milliseconds, to wait before a task asks an AM for another task. */
public static final String TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX = TEZ_TASK_PREFIX
+ "get-task.sleep.interval-ms.max";
public static final int TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT = 200;
@@ -190,12 +195,21 @@ public class TezConfiguration extends Configuration {
public static final boolean
TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED_DEFAULT = false;
+ /**
+ * The amount of time to wait before assigning a container to the next level
+ * of locality. NODE - RACK - NON_LOCAL
+ */
public static final String
TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS =
TEZ_AM_PREFIX + "container.reuse.locality.delay-allocation-millis";
public static final long
TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS_DEFAULT = 1000l;
+ /**
+ * The amount of time to hold on to a container if no task can be assigned to
+ * it immediately. Only active when reuse is enabled. Set to -1 to never
+ * release a container in a session.
+ */
public static final String TEZ_AM_CONTAINER_SESSION_DELAY_ALLOCATION_MILLIS =
TEZ_AM_PREFIX + "container.session.delay-allocation-millis";
public static final long
@@ -216,8 +230,8 @@ public class TezConfiguration extends Configuration {
public static final String TEZ_CONTAINER_OUT_FILE_NAME = "stdout";
- public static final String TEZ_LIB_URIS =
- TEZ_PREFIX + "lib.uris";
+ /** The location of the Tez libraries which will be localized for DAGs */
+ public static final String TEZ_LIB_URIS = TEZ_PREFIX + "lib.uris";
public static final String TEZ_APPLICATION_TYPE = "TEZ";
@@ -230,15 +244,27 @@ public class TezConfiguration extends Configuration {
"grouping.by-count";
public static final boolean TEZ_AM_GROUPING_SPLIT_BY_COUNT_DEFAULT = false;
+ /**
+ * The multiplier for available queue capacity when determining number of
+ * tasks for a Vertex. 1.7 with 100% queue available implies generating a
+ * number of tasks roughly equal to 170% of the available containers on the
+ * queue
+ */
public static final String TEZ_AM_GROUPING_SPLIT_WAVES = TEZ_AM_PREFIX +
"grouping.split-waves";
public static float TEZ_AM_GROUPING_SPLIT_WAVES_DEFAULT = 1.5f;
+ /**
+ * Upper bound on the size (in bytes) of a grouped split, to avoid generating excessively large splits.
+ */
public static final String TEZ_AM_GROUPING_SPLIT_MAX_SIZE = TEZ_AM_PREFIX +
"grouping.max-size";
public static long TEZ_AM_GROUPING_SPLIT_MAX_SIZE_DEFAULT =
1024*1024*1024L;
+ /**
+ * Lower bound on the size (in bytes) of a grouped split, to avoid generating too many splits.
+ */
public static final String TEZ_AM_GROUPING_SPLIT_MIN_SIZE = TEZ_AM_PREFIX +
"grouping.min-size";
public static long TEZ_AM_GROUPING_SPLIT_MIN_SIZE_DEFAULT =
http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/2e93f176/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
index 835f9f0..1ba5f32 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
@@ -57,12 +57,22 @@ public class ShuffleVertexManager implements VertexManagerPlugin {
private static final String TEZ_AM_PREFIX = "tez.am.";
+ /**
+ * In case of a ScatterGather connection, the fraction of source tasks which
+ * should complete before tasks for the current vertex are scheduled
+ */
public static final String
TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION = TEZ_AM_PREFIX
+ "shuffle-vertex-manager.min-src-fraction";
public static final float
TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT = 0.25f;
+ /**
+ * In case of a ScatterGather connection, once this fraction of source tasks
+ * have completed, all tasks on the current vertex can be scheduled. Number of
+ * tasks ready for scheduling on the current vertex scales linearly between
+ * min-fraction and max-fraction
+ */
public static final String
TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION = TEZ_AM_PREFIX
+ "shuffle-vertex-manager.max-src-fraction";