You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2014/12/17 21:54:20 UTC
tez git commit: TEZ-1852. Get examples to work in Local Mode.
Contributed by Prakash Ramachandran. (cherry picked from commit
14cc02db29bdce0c6cac5bba301875b472b7e8f5)
Repository: tez
Updated Branches:
refs/heads/branch-0.5 880df51b0 -> 410ddf1f6
TEZ-1852. Get examples to work in Local Mode. Contributed by Prakash
Ramachandran.
(cherry picked from commit 14cc02db29bdce0c6cac5bba301875b472b7e8f5)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/410ddf1f
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/410ddf1f
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/410ddf1f
Branch: refs/heads/branch-0.5
Commit: 410ddf1f6430d04a0fdc3e0ef82b5a02726a5a62
Parents: 880df51
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Dec 17 12:52:44 2014 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Dec 17 12:53:26 2014 -0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../java/org/apache/tez/examples/HashJoinExample.java | 10 ++++++++--
.../java/org/apache/tez/examples/JoinValidate.java | 7 +++++--
.../org/apache/tez/examples/OrderedWordCount.java | 14 +++++++++++---
.../main/java/org/apache/tez/examples/WordCount.java | 7 +++++--
5 files changed, 30 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/410ddf1f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 760fb60..90058b7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,7 @@ Apache Tez Change Log
Release 0.5.4: Unreleased
ALL CHANGES:
+ TEZ-1852. Get examples to work in LocalMode.
TEZ-1861. Fix failing test: TestOnFileSortedOutput.
TEZ-1836. Provide better error messages when tez.runtime.io.sort.mb, spill percentage is incorrectly configured.
TEZ-1800. Integer overflow in ExternalSorter.getInitialMemoryRequirement()
http://git-wip-us.apache.org/repos/asf/tez/blob/410ddf1f/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java b/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java
index 3f9085f..e723b1f 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java
@@ -261,12 +261,15 @@ public class HashJoinExample extends Configured implements Tool {
* going to the same fragments using hash partitioning. The data to be
* joined is the key itself and so the value is null. The number of
* fragments is initially inferred from the number of tasks running in the
- * join vertex because each task will be handling one fragment.
+ * join vertex because each task will be handling one fragment. The
+ * setFromConfiguration call is optional and allows overriding the config
+ * options with command line parameters.
*/
UnorderedPartitionedKVEdgeConfig streamConf =
UnorderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), NullWritable.class.getName(),
HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
.build();
/**
@@ -288,11 +291,14 @@ public class HashJoinExample extends Configured implements Tool {
* of its fragment of keys with all the keys of the hash side. Using an
* unpartitioned edge to transfer the complete output of the hash side to
* be broadcasted to all fragments of the streamed side. Again, since the
- * data is the key, the value is null.
+ * data is the key, the value is null. The setFromConfiguration call is
+ * optional and allows overriding the config options with command line
+ * parameters.
*/
UnorderedKVEdgeConfig broadcastConf =
UnorderedKVEdgeConfig
.newBuilder(Text.class.getName(), NullWritable.class.getName())
+ .setFromConfiguration(tezConf)
.build();
hashSideEdgeProperty = broadcastConf.createDefaultBroadcastEdgeProperty();
} else {
http://git-wip-us.apache.org/repos/asf/tez/blob/410ddf1f/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
index 0b8671d..9770f6f 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
@@ -183,10 +183,13 @@ public class JoinValidate extends Configured implements Tool {
// Configuration for intermediate output - shared by Vertex1 and Vertex2
// This should only be setting selective keys from the underlying conf. Fix after there's a
- // better mechanism to configure the IOs.
+ // better mechanism to configure the IOs. The setFromConfiguration call is optional and allows
+ // overriding the config options with command line parameters.
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), NullWritable.class.getName(),
- HashPartitioner.class.getName()).build();
+ HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
+ .build();
Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("lhs",
http://git-wip-us.apache.org/repos/asf/tez/blob/410ddf1f/tez-examples/src/main/java/org/apache/tez/examples/OrderedWordCount.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/OrderedWordCount.java b/tez-examples/src/main/java/org/apache/tez/examples/OrderedWordCount.java
index fd18592..fbe9a88 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/OrderedWordCount.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/OrderedWordCount.java
@@ -138,9 +138,13 @@ public class OrderedWordCount extends Configured implements Tool {
tokenizerVertex.addDataSource(INPUT, dataSource);
// Use Text key and IntWritable value to bring counts for each word in the same partition
+ // The setFromConfiguration call is optional and allows overriding the config options with
+ // command line parameters.
OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
- HashPartitioner.class.getName()).build();
+ HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
+ .build();
// This vertex will be reading intermediate data via an input edge and writing intermediate data
// via an output edge.
@@ -148,10 +152,14 @@ public class OrderedWordCount extends Configured implements Tool {
SumProcessor.class.getName()), numPartitions);
// Use IntWritable key and Text value to bring all words with the same count in the same
- // partition. The data will be ordered by count and words grouped by count.
+ // partition. The data will be ordered by count and words grouped by count. The
+ // setFromConfiguration call is optional and allows overriding the config options with
+ // command line parameters.
OrderedPartitionedKVEdgeConfig sorterEdgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(IntWritable.class.getName(), Text.class.getName(),
- HashPartitioner.class.getName()).build();
+ HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
+ .build();
// Use 1 task to bring all the data in one place for global sorted order. Essentially the number
// of partitions is 1. So the NoOpSorter can be used to produce the globally ordered output
http://git-wip-us.apache.org/repos/asf/tez/blob/410ddf1f/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java b/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java
index e0e94cb..aadd0e7 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java
@@ -170,10 +170,13 @@ public class WordCount extends Configured implements Tool {
// We specify the key, value and partitioner type. Here the key type is Text (for word), the
// value type is IntWritable (for count) and we using a hash based partitioner. This is a helper
// object. The edge can be configured by configuring the input, output etc individually without
- // using this helper.
+ // using this helper. The setFromConfiguration call is optional and allows overriding the config
+ // options with command line parameters.
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
- HashPartitioner.class.getName()).build();
+ HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
+ .build();
// Create a vertex that reads the tokenized data and calculates the sum using the SumProcessor.
// The number of tasks that do the work of this vertex depends on the number of partitions used