You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by en...@apache.org on 2007/12/04 14:40:16 UTC
svn commit: r600952 - in /lucene/hadoop/trunk: ./ src/contrib/abacus/
src/examples/org/apache/hadoop/examples/ src/examples/python/pyAbacus/
src/java/org/apache/hadoop/mapred/lib/aggregate/
Author: enis
Date: Tue Dec 4 05:40:15 2007
New Revision: 600952
URL: http://svn.apache.org/viewvc?rev=600952&view=rev
Log:
HADOOP-1302. Remove deprecated abacus code from the contrib directory. This also fixes a configuration bug in AggregateWordCount, so that the job now works. Contributed by Enis Soztutar.
Added:
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
lucene/hadoop/trunk/src/examples/python/pyAbacus/
- copied from r600859, lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/
Removed:
lucene/hadoop/trunk/src/contrib/abacus/
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Dec 4 05:40:15 2007
@@ -16,6 +16,10 @@
Configuration no longer supports the notion of default/final resources.
(acmurthy)
+ HADOOP-1302. Remove deprecated abacus code from the contrib directory.
+ This also fixes a configuration bug in AggregateWordCount, so that the
+ job now works. (enis)
+
NEW FEATURES
HADOOP-1857. Ability to run a script when a task fails to capture stack
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java Tue Dec 4 05:40:15 2007
@@ -18,16 +18,16 @@
package org.apache.hadoop.examples;
+import java.io.IOException;
import java.util.ArrayList;
-import java.util.Map.Entry;
import java.util.StringTokenizer;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-
-import java.io.IOException;
+import java.util.Map.Entry;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.lib.aggregate.*;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
/**
* This is an example Aggregated Hadoop Map/Reduce application. It reads the
@@ -42,6 +42,7 @@
public static class WordCountPlugInClass extends
ValueAggregatorBaseDescriptor {
+ @Override
public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
Object val) {
String countType = LONG_VALUE_SUM;
@@ -65,15 +66,11 @@
* @throws IOException
* When there is communication problems with the job tracker.
*/
+ @SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException {
- JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args);
- //specify the number of aggregators to be used
- conf.setInt("aggregator.descriptor.num", 1);
- //specify the aggregator descriptor
- conf
- .set(
- "aggregator.descriptor.num.0",
- "UserDefined,org.apache.hadoop.examples.AggregateWordCount.WordCountPlugInClass");
+ JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+ , new Class[] {WordCountPlugInClass.class});
+
JobClient.runJob(conf);
}
Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java?rev=600952&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java Tue Dec 4 05:40:15 2007
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
+
+/**
+ * This is an example Aggregated Hadoop Map/Reduce application. Computes the
+ * histogram of the words in the input texts.
+ *
+ * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordhist <i>in-dir</i>
+ * <i>out-dir</i> <i>numOfReducers</i> textinputformat
+ *
+ */
+public class AggregateWordHistogram {
+
+ public static class AggregateWordHistogramPlugin
+ extends ValueAggregatorBaseDescriptor {
+
+ /**
+ * Parse the given value, generate an aggregation-id/value pair per word.
+ * The ID is of type VALUE_HISTOGRAM, with WORD_HISTOGRAM as the real id.
+ * The value is WORD\t1.
+ *
+ * @return a list of the generated pairs.
+ */
+ @Override
+ public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key, Object val) {
+ String words[] = val.toString().split(" |\t");
+ ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
+ for (int i = 0; i < words.length; i++) {
+ Text valCount = new Text(words[i] + "\t" + "1");
+ Entry<Text, Text> en = generateEntry(VALUE_HISTOGRAM, "WORD_HISTOGRAM",
+ valCount);
+ retv.add(en);
+ }
+ return retv;
+ }
+
+ }
+
+ /**
+ * The main driver for word count map/reduce program. Invoke this method to
+ * submit the map/reduce job.
+ *
+ * @throws IOException
+ * When there is communication problems with the job tracker.
+ */
+ @SuppressWarnings("unchecked")
+ public static void main(String[] args) throws IOException {
+ JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+ , new Class[] {AggregateWordHistogramPlugin.class});
+
+ JobClient.runJob(conf);
+ }
+
+}
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java Tue Dec 4 05:40:15 2007
@@ -34,6 +34,8 @@
"A map/reduce program that counts the words in the input files.");
pgd.addClass("aggregatewordcount", AggregateWordCount.class,
"An Aggregate based map/reduce program that counts the words in the input files.");
+ pgd.addClass("aggregatewordhist", AggregateWordHistogram.class,
+ "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
pgd.addClass("grep", Grep.class,
"A map/reduce program that counts the matches of a regex in the input.");
pgd.addClass("randomwriter", RandomWriter.class,
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java Tue Dec 4 05:40:15 2007
@@ -78,16 +78,23 @@
*/
public class ValueAggregatorJob {
- public static JobControl createValueAggregatorJobs(String args[])
- throws IOException {
+ public static JobControl createValueAggregatorJobs(String args[]
+ , Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException {
+
JobControl theControl = new JobControl("ValueAggregatorJobs");
ArrayList dependingJobs = new ArrayList();
JobConf aJobConf = createValueAggregatorJob(args);
+ if(descriptors != null)
+ setAggregatorDescriptors(aJobConf, descriptors);
Job aJob = new Job(aJobConf, dependingJobs);
theControl.addJob(aJob);
return theControl;
}
+ public static JobControl createValueAggregatorJobs(String args[]) throws IOException {
+ return createValueAggregatorJobs(args, null);
+ }
+
/**
* Create an Aggregate based map/reduce job.
*
@@ -174,6 +181,23 @@
return theJob;
}
+ public static JobConf createValueAggregatorJob(String args[]
+ , Class<? extends ValueAggregatorDescriptor>[] descriptors)
+ throws IOException {
+ JobConf job = createValueAggregatorJob(args);
+ setAggregatorDescriptors(job, descriptors);
+ return job;
+ }
+
+ public static void setAggregatorDescriptors(JobConf job
+ , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
+ job.setInt("aggregator.descriptor.num", descriptors.length);
+ //specify the aggregator descriptors
+ for(int i=0; i< descriptors.length; i++) {
+ job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
+ }
+ }
+
/**
* create and run an Aggregate based map/reduce job.
*