You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by en...@apache.org on 2007/12/04 14:40:16 UTC

svn commit: r600952 - in /lucene/hadoop/trunk: ./ src/contrib/abacus/ src/examples/org/apache/hadoop/examples/ src/examples/python/pyAbacus/ src/java/org/apache/hadoop/mapred/lib/aggregate/

Author: enis
Date: Tue Dec  4 05:40:15 2007
New Revision: 600952

URL: http://svn.apache.org/viewvc?rev=600952&view=rev
Log:
HADOOP-1302.  Remove deprecated abacus code from the contrib directory. This also fixes a configuration bug in AggregateWordCount, so that the job now works.  Contributed by Enis Soztutar. 

Added:
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
    lucene/hadoop/trunk/src/examples/python/pyAbacus/
      - copied from r600859, lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/
Removed:
    lucene/hadoop/trunk/src/contrib/abacus/
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Dec  4 05:40:15 2007
@@ -16,6 +16,10 @@
     Configuration no longer supports the notion of default/final resources.
     (acmurthy) 
 
+    HADOOP-1302.  Remove deprecated abacus code from the contrib directory.
+    This also fixes a configuration bug in AggregateWordCount, so that the
+    job now works.  (enis)
+
   NEW FEATURES
 
     HADOOP-1857.  Ability to run a script when a task fails to capture stack

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java Tue Dec  4 05:40:15 2007
@@ -18,16 +18,16 @@
 
 package org.apache.hadoop.examples;
 
+import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Map.Entry;
 import java.util.StringTokenizer;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-
-import java.io.IOException;
+import java.util.Map.Entry;
 
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.lib.aggregate.*;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
 
 /**
  * This is an example Aggregated Hadoop Map/Reduce application. It reads the
@@ -42,6 +42,7 @@
 
   public static class WordCountPlugInClass extends
       ValueAggregatorBaseDescriptor {
+    @Override
     public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
                                                             Object val) {
       String countType = LONG_VALUE_SUM;
@@ -65,15 +66,11 @@
    * @throws IOException
    *           When there is communication problems with the job tracker.
    */
+  @SuppressWarnings("unchecked")
   public static void main(String[] args) throws IOException {
-    JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args);
-    //specify the number of aggregators to be used
-    conf.setInt("aggregator.descriptor.num", 1);
-    //specify the aggregator descriptor
-    conf
-        .set(
-            "aggregator.descriptor.num.0",
-            "UserDefined,org.apache.hadoop.examples.AggregateWordCount.WordCountPlugInClass");
+    JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+        , new Class[] {WordCountPlugInClass.class});
+   
     JobClient.runJob(conf);
   }
 

Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java?rev=600952&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java Tue Dec  4 05:40:15 2007
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
+
+/**
+ * This is an example Aggregated Hadoop Map/Reduce application. Computes the
+ * histogram of the words in the input texts.
+ * 
+ * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordhist <i>in-dir</i>
+ * <i>out-dir</i> <i>numOfReducers</i> textinputformat
+ * 
+ */
+public class AggregateWordHistogram {
+
+  public static class AggregateWordHistogramPlugin 
+    extends ValueAggregatorBaseDescriptor {
+    
+    /**
+     * Parse the given value, generate an aggregation-id/value pair per word.
+     * The ID is of type VALUE_HISTOGRAM, with WORD_HISTOGRAM as the real id.
+     * The value is WORD\t1.
+     *
+     * @return a list of the generated pairs.
+     */
+    @Override
+    public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key, Object val) {
+      String words[] = val.toString().split(" |\t");
+      ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
+      for (int i = 0; i < words.length; i++) {
+        Text valCount = new Text(words[i] + "\t" + "1");
+        Entry<Text, Text> en = generateEntry(VALUE_HISTOGRAM, "WORD_HISTOGRAM",
+                                 valCount);
+        retv.add(en);
+      }
+      return retv;
+    }
+    
+  }
+  
+  /**
+   * The main driver for word count map/reduce program. Invoke this method to
+   * submit the map/reduce job.
+   * 
+   * @throws IOException
+   *           When there is communication problems with the job tracker.
+   */
+  @SuppressWarnings("unchecked")
+  public static void main(String[] args) throws IOException {
+    JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+        , new Class[] {AggregateWordHistogramPlugin.class});
+    
+    JobClient.runJob(conf);
+  }
+  
+}

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java Tue Dec  4 05:40:15 2007
@@ -34,6 +34,8 @@
                    "A map/reduce program that counts the words in the input files.");
       pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                    "An Aggregate based map/reduce program that counts the words in the input files.");
+      pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
+                   "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
       pgd.addClass("grep", Grep.class, 
                    "A map/reduce program that counts the matches of a regex in the input.");
       pgd.addClass("randomwriter", RandomWriter.class, 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java?rev=600952&r1=600951&r2=600952&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java Tue Dec  4 05:40:15 2007
@@ -78,16 +78,23 @@
  */
 public class ValueAggregatorJob {
 
-  public static JobControl createValueAggregatorJobs(String args[])
-    throws IOException {
+  public static JobControl createValueAggregatorJobs(String args[]
+    , Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException {
+    
     JobControl theControl = new JobControl("ValueAggregatorJobs");
     ArrayList dependingJobs = new ArrayList();
     JobConf aJobConf = createValueAggregatorJob(args);
+    if(descriptors != null)
+      setAggregatorDescriptors(aJobConf, descriptors);
     Job aJob = new Job(aJobConf, dependingJobs);
     theControl.addJob(aJob);
     return theControl;
   }
 
+  public static JobControl createValueAggregatorJobs(String args[]) throws IOException {
+    return createValueAggregatorJobs(args, null);
+  }
+  
   /**
    * Create an Aggregate based map/reduce job.
    * 
@@ -174,6 +181,23 @@
     return theJob;
   }
 
+  public static JobConf createValueAggregatorJob(String args[]
+    , Class<? extends ValueAggregatorDescriptor>[] descriptors)
+  throws IOException {
+    JobConf job = createValueAggregatorJob(args);
+    setAggregatorDescriptors(job, descriptors);
+    return job;
+  }
+  
+  public static void setAggregatorDescriptors(JobConf job
+      , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
+    job.setInt("aggregator.descriptor.num", descriptors.length);
+    //specify the aggregator descriptors
+    for(int i=0; i< descriptors.length; i++) {
+      job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
+    }    
+  }
+  
   /**
    * create and run an Aggregate based map/reduce job.
    *