You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2016/11/08 18:59:41 UTC
tez git commit: TEZ-3477. MRInputHelpers generateInputSplitsToMem
public API modified (jeagles)
Repository: tez
Updated Branches:
refs/heads/master b4c949c9c -> eb6fb67c4
TEZ-3477. MRInputHelpers generateInputSplitsToMem public API modified (jeagles)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/eb6fb67c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/eb6fb67c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/eb6fb67c
Branch: refs/heads/master
Commit: eb6fb67c4220ea868fee6d73cf9e9b76af3696af
Parents: b4c949c
Author: Jonathan Eagles <je...@yahoo-inc.com>
Authored: Tue Nov 8 12:59:28 2016 -0600
Committer: Jonathan Eagles <je...@yahoo-inc.com>
Committed: Tue Nov 8 12:59:28 2016 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../tez/mapreduce/hadoop/MRInputHelpers.java | 61 ++++++++++++++++++++
2 files changed, 62 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/eb6fb67c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ecfe935..88767ec 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES
ALL CHANGES:
+ TEZ-3477. MRInputHelpers generateInputSplitsToMem public API modified
TEZ-3465. Support broadcast edge into cartesian product vertex and forbid other edges.
TEZ-3493. DAG submit timeout cannot be set to a month
TEZ-3505. Move license to the file header for TezBytesWritableSerialization
http://git-wip-us.apache.org/repos/asf/tez/blob/eb6fb67c/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
index 97e1677..b0a76fa 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
@@ -102,6 +102,7 @@ public class MRInputHelpers {
* as a data source to a {@link org.apache.tez.dag.api.Vertex}
*/
@InterfaceStability.Unstable
+ @InterfaceAudience.LimitedPrivate({"hive, pig"})
public static DataSourceDescriptor configureMRInputWithLegacySplitGeneration(Configuration conf,
Path splitsDir,
boolean useLegacyInput) {
@@ -140,6 +141,7 @@ public class MRInputHelpers {
* @throws IOException
*/
@InterfaceStability.Evolving
+ @InterfaceAudience.LimitedPrivate({"hive, pig"})
public static MRRuntimeProtos.MRInputUserPayloadProto parseMRInputPayload(UserPayload payload)
throws IOException {
return MRRuntimeProtos.MRInputUserPayloadProto.parseFrom(ByteString.copyFrom(payload.getPayload()));
@@ -157,6 +159,7 @@ public class MRInputHelpers {
*/
@SuppressWarnings("unchecked")
@InterfaceStability.Evolving
+ @InterfaceAudience.LimitedPrivate({"hive, pig"})
public static InputSplit createOldFormatSplitFromUserPayload(
MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
throws IOException {
@@ -239,6 +242,7 @@ public class MRInputHelpers {
}
@InterfaceStability.Evolving
+ @InterfaceAudience.LimitedPrivate({"hive, pig"})
public static MRRuntimeProtos.MRSplitProto createSplitProto(
org.apache.hadoop.mapred.InputSplit oldSplit) throws IOException {
MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto.newBuilder();
@@ -278,6 +282,38 @@ public class MRInputHelpers {
* @throws InterruptedException
*/
@InterfaceStability.Unstable
+ @InterfaceAudience.LimitedPrivate({"hive, pig"})
+ public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf,
+ boolean groupSplits, int targetTasks)
+ throws IOException, ClassNotFoundException, InterruptedException {
+ return generateInputSplitsToMem(conf, groupSplits, true, targetTasks);
+ }
+
+ /**
+ * Generates Input splits and stores them in a {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto} instance.
+ *
+ * Returns an instance of {@link InputSplitInfoMem}
+ *
+ * With grouping enabled, the eventual configuration used by the tasks, will have
+ * the user-specified InputFormat replaced by either {@link org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat}
+ * or {@link org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat}
+ *
+ * @param conf
+ * an instance of Configuration which is used to determine whether
+ * the mapred of mapreduce API is being used. This Configuration
+ * instance should also contain adequate information to be able to
+ * generate splits - like the InputFormat being used and related
+ * configuration.
+ * @param groupSplits whether to group the splits or not
+ * @param sortSplits whether to sort the splits or not
+ * @param targetTasks the number of target tasks if grouping is enabled. Specify as 0 otherwise.
+ * @return an instance of {@link InputSplitInfoMem} which supports a subset of
+ * the APIs defined on {@link InputSplitInfo}
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ @InterfaceStability.Unstable
public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf,
boolean groupSplits, boolean sortSplits, int targetTasks)
throws IOException, ClassNotFoundException, InterruptedException {
@@ -669,6 +705,31 @@ public class MRInputHelpers {
}
/**
+ * Called to specify that grouping of input splits be performed by Tez
+ * The conf should have the input format class configuration
+ * set to the TezGroupedSplitsInputFormat. The real input format class name
+ * should be passed as an argument to this method.
+ * <p/>
+ * With grouping enabled, the eventual configuration used by the tasks, will have
+ * the user-specified InputFormat replaced by either {@link org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat}
+ * or {@link org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat}
+ */
+ @InterfaceAudience.Private
+ protected static UserPayload createMRInputPayloadWithGrouping(Configuration conf) throws IOException {
+ Preconditions
+ .checkArgument(conf != null, "Configuration must be specified");
+ return createMRInputPayload(TezUtils.createByteStringFromConf(conf),
+ null, true, true);
+ }
+
+ @InterfaceAudience.Private
+ protected static UserPayload createMRInputPayload(Configuration conf,
+ MRRuntimeProtos.MRSplitsProto mrSplitsProto) throws
+ IOException {
+ return createMRInputPayload(conf, mrSplitsProto, false, true);
+ }
+
+ /**
* When isGrouped is true, it specifies that grouping of input splits be
* performed by Tez The conf should have the input format class configuration
* set to the TezGroupedSplitsInputFormat. The real input format class name