You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/08 20:20:26 UTC

svn commit: r376028 - in /lucene/hadoop/trunk: ./ bin/ src/examples/ src/examples/org/ src/examples/org/apache/ src/examples/org/apache/hadoop/ src/examples/org/apache/hadoop/examples/ src/java/org/apache/hadoop/mapred/demo/ src/java/org/apache/hadoop/...

Author: cutting
Date: Wed Feb  8 11:20:24 2006
New Revision: 376028

URL: http://svn.apache.org/viewcvs?rev=376028&view=rev
Log:
Fix HADOOP-25: improve example code & package separately.  Contributed by Owen O'Malley.

Added:
    lucene/hadoop/trunk/src/examples/
    lucene/hadoop/trunk/src/examples/org/
    lucene/hadoop/trunk/src/examples/org/apache/
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
      - copied, changed from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html
      - copied, changed from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java
Removed:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/
Modified:
    lucene/hadoop/trunk/bin/hadoop
    lucene/hadoop/trunk/build.xml

Modified: lucene/hadoop/trunk/bin/hadoop
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop?rev=376028&r1=376027&r2=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/bin/hadoop (original)
+++ lucene/hadoop/trunk/bin/hadoop Wed Feb  8 11:20:24 2006
@@ -34,6 +34,7 @@
   echo "  jobtracker        run the MapReduce job Tracker node" 
   echo "  tasktracker       run a MapReduce task Tracker node" 
   echo "  job               manipulate MapReduce jobs" 
+  echo "  jar <jar>         run a jar file"
   echo " or"
   echo "  CLASSNAME         run the class named CLASSNAME"
   echo "Most commands print help when invoked w/o parameters."
@@ -117,6 +118,15 @@
   CLASS=org.apache.hadoop.mapred.TaskTracker
 elif [ "$COMMAND" = "job" ] ; then
   CLASS=org.apache.hadoop.mapred.JobClient
+elif [ "$COMMAND" = "jar" ] ; then
+  JAR="$1"
+  shift
+  CLASS=`"$0" org.apache.hadoop.util.PrintJarMainClass "$JAR"`
+  if [ $? != 0 ]; then
+    echo "Error: Could not find main class in jar file $JAR"
+    exit 1
+  fi
+  CLASSPATH=${CLASSPATH}:${JAR}
 else
   CLASS=$COMMAND
 fi

Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/build.xml?rev=376028&r1=376027&r2=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Wed Feb  8 11:20:24 2006
@@ -15,6 +15,7 @@
 
   <property name="basedir" value="./"/>
   <property name="src.dir" value="src/java"/>
+  <property name="examples.dir" value="src/examples"/>
   <property name="lib.dir" value="lib"/>
   <property name="conf.dir" value="conf"/>
   <property name="docs.dir" value="docs"/>
@@ -23,6 +24,7 @@
   <property name="build.dir" value="build"/>
   <property name="build.classes" value="${build.dir}/classes"/>
   <property name="build.webapps" value="${build.dir}/webapps"/>
+  <property name="build.examples" value="${build.dir}/examples"/>
   <property name="build.docs" value="${build.dir}/docs"/>
   <property name="build.javadoc" value="${build.docs}/api"/>
   <property name="build.encoding" value="ISO-8859-1"/>
@@ -70,6 +72,7 @@
     <mkdir dir="${build.dir}"/>
     <mkdir dir="${build.classes}"/>
     <mkdir dir="${build.webapps}"/>
+    <mkdir dir="${build.examples}"/>
 
     <mkdir dir="${test.build.dir}"/>
     <mkdir dir="${test.build.classes}"/>
@@ -106,6 +109,21 @@
     </javac>    
   </target>
 
+  <target name="compile-examples" depends="compile">
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${examples.dir}"
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${build.examples}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <classpath refid="classpath"/>
+    </javac>    
+  </target>
+
   <!-- ================================================================== -->
   <!-- Make hadoop.jar                                                     -->
   <!-- ================================================================== -->
@@ -127,6 +145,21 @@
   </target>
 
   <!-- ================================================================== -->
+  <!-- Make the Hadoop examples jar.                                      -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="examples" depends="jar, compile-examples">
+    <jar jarfile="${build.dir}/${final.name}-examples.jar"
+         basedir="${build.examples}">
+      <manifest>
+        <attribute name="Main-Class" 
+                   value="org/apache/hadoop/examples/ExampleDriver"/>
+      </manifest>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
   <!-- Compile test code                                                  --> 
   <!-- ================================================================== -->
   <target name="compile-test" depends="compile">
@@ -147,7 +180,7 @@
   <!-- ================================================================== -->
   <!-- Run unit tests                                                     --> 
   <!-- ================================================================== -->
-  <target name="test" depends="compile, compile-test">
+  <target name="test" depends="compile, examples, compile-test">
 
     <delete dir="${test.build.data}"/>
     <mkdir dir="${test.build.data}"/>
@@ -190,8 +223,13 @@
       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
       >
     	<packageset dir="${src.dir}"/>
+    	<packageset dir="${examples.dir}"/>
         <link href="${javadoc.link.java}"/>
         <classpath refid="classpath"/>
+
+    	<group title="Core" packages="org.apache.hadoop.*"/>
+    	<group title="Examples" packages="org.apache.hadoop.examples"/>
+
     </javadoc>
   </target>	
 	
@@ -205,7 +243,7 @@
   <!-- ================================================================== -->
   <!--                                                                    -->
   <!-- ================================================================== -->
-  <target name="package" depends="jar, javadoc">
+  <target name="package" depends="jar, javadoc, examples">
     <mkdir dir="${dist.dir}"/>
     <mkdir dir="${dist.dir}/lib"/>
     <mkdir dir="${dist.dir}/bin"/>
@@ -221,6 +259,8 @@
     </copy>
 
     <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
+
+    <copy file="${build.dir}/${name}-examples.jar" todir="${dist.dir}"/>
 
     <copy todir="${dist.dir}/bin">
       <fileset dir="bin"/>

Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java Wed Feb  8 11:20:24 2006
@@ -0,0 +1,131 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.TreeMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class ExampleDriver {
+  
+  /**
+   * A description of an example program based on its class and a 
+   * human-readable description.
+   * @author Owen O'Malley
+   * @date feb 2006
+   */
+  static private class ProgramDescription {
+    
+    static final Class[] paramTypes = new Class[] {String[].class};
+    
+    /**
+     * Create a description of an example program.
+     * @param mainClass the class with the main for the example program
+     * @param description a string to display to the user in help messages
+     * @throws SecurityException if we can't use reflection
+     * @throws NoSuchMethodException if the class doesn't have a main method
+     */
+    public ProgramDescription(Class mainClass, 
+                              String description)
+    throws SecurityException, NoSuchMethodException {
+      this.main = mainClass.getMethod("main", paramTypes);
+      this.description = description;
+    }
+    
+    /**
+     * Invoke the example application with the given arguments
+     * @param args the arguments for the application
+     * @throws Throwable The exception thrown by the invoked method
+     */
+    public void invoke(String[] args)
+    throws Throwable {
+      try {
+        main.invoke(null, new Object[]{args});
+      } catch (InvocationTargetException except) {
+        throw except.getCause();
+      }
+    }
+    
+    public String getDescription() {
+      return description;
+    }
+    
+    private Method main;
+    private String description;
+  }
+  
+  private static void printUsage(Map programs) {
+    System.out.println("Valid program names are:");
+    for(Iterator itr=programs.entrySet().iterator(); itr.hasNext();) {
+      Map.Entry item = (Entry) itr.next();
+      System.out.println("  " + (String) item.getKey() + ": " +
+          ((ProgramDescription) item.getValue()).getDescription());
+    }   
+  }
+  
+  /**
+   * This is a driver for the example programs.
+   * It looks at the first command line argument and tries to find an
+   * example program with that name.
+   * If it is found, it calls the main method in that class with the rest 
+   * of the command line arguments.
+   * @param args The argument from the user. args[0] is the command to run.
+   * @throws NoSuchMethodException 
+   * @throws SecurityException 
+   * @throws IllegalAccessException 
+   * @throws IllegalArgumentException 
+   * @throws Throwable Anything thrown by the example program's main
+   */
+  public static void main(String[] args) 
+  throws Throwable 
+  {
+    Map programs = new TreeMap();
+    
+    // Add new programs to this list
+    programs.put("wordcount", new ProgramDescription(WordCount.class,
+    "A map/reduce program that counts the words in the input files."));
+    programs.put("grep", new ProgramDescription(Grep.class,
+    "A map/reduce program that counts the matches of a regex in the input."));
+    
+    // Make sure they gave us a program name.
+    if (args.length == 0) {
+      System.out.println("An example program must be given as the" + 
+          " first argument.");
+      printUsage(programs);
+      return;
+    }
+    
+    // And that it is good.
+    ProgramDescription pgm = (ProgramDescription) programs.get(args[0]);
+    if (pgm == null) {
+      System.out.println("Unknown program '" + args[0] + "' chosen.");
+      printUsage(programs);
+      return;
+    }
+    
+    // Remove the leading argument and call main
+    String[] new_args = new String[args.length - 1];
+    for(int i=1; i < args.length; ++i) {
+      new_args[i-1] = args[i];
+    }
+    pgm.invoke(new_args);
+  }
+  
+}

Copied: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java (from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java?p2=lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java&p1=lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java&r1=376018&r2=376028&rev=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java Wed Feb  8 11:20:24 2006
@@ -13,19 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.mapred.demo;
+package org.apache.hadoop.examples;
 
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 
 import org.apache.hadoop.mapred.lib.RegexMapper;
 import org.apache.hadoop.mapred.lib.InverseMapper;
 import org.apache.hadoop.mapred.lib.LongSumReducer;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
 
 import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.WritableComparator;
 
 import org.apache.hadoop.conf.Configuration;
 

Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java Wed Feb  8 11:20:24 2006
@@ -0,0 +1,156 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.io.*;
+import java.net.URL;
+import java.util.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * This is an example Hadoop Map/Reduce application.
+ * It reads the text input files, breaks each line into words
+ * and counts them. The output is a locally sorted list of words and the 
+ * count of how often they occurred.
+ *
+ * To run: bin/hadoop jar build/hadoop-examples.jar wordcount
+ *            [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i> 
+ *
+ * @author Owen O'Malley
+ */
+public class WordCount {
+  
+  /**
+   * Counts the words in each line.
+   * For each line of input, break the line into words and emit them as
+   * (<b>word</b>, <b>1</b>).
+   */
+  public static class MapClass implements Mapper {
+    
+    private final static IntWritable one = new IntWritable(1);
+    
+    public void map(WritableComparable key, Writable value, 
+        OutputCollector output, 
+        Reporter reporter) throws IOException {
+      String line = ((UTF8)value).toString();
+      StringTokenizer itr = new StringTokenizer(line);
+      while (itr.hasMoreTokens()) {
+        String word = itr.nextToken();
+        output.collect(new UTF8(word), one);
+      }
+    }
+    
+    public void configure(JobConf job) {
+    }
+    
+  }
+  
+  /**
+   * A reducer class that just emits the sum of the input values.
+   */
+  public static class Reduce implements Reducer {
+    
+    public void reduce(WritableComparable key, Iterator values,
+        OutputCollector output, 
+        Reporter reporter) throws IOException {
+      int sum = 0;
+      while (values.hasNext()) {
+        sum += ((IntWritable) values.next()).get();
+      }
+      output.collect(key, new IntWritable(sum));
+    }
+    
+    public void configure(JobConf job) {
+    }
+    
+  }
+  
+  static void printUsage() {
+    System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
+    System.exit(1);
+  }
+  
+  /**
+   * The main driver for word count map/reduce program.
+   * Invoke this method to submit the map/reduce job.
+   * @throws IOException When there is communication problems with the 
+   *                     job tracker.
+   */
+  public static void main(String[] args) throws IOException {
+    Configuration defaults = new Configuration();
+    
+    JobConf countJob = new JobConf(defaults);
+    
+    URL jar_url = WordCount.class.getClassLoader().
+    getResource("hadoop-examples.jar");
+    countJob.setJar(jar_url.getPath());
+    
+    // the keys are words (strings)
+    countJob.setOutputKeyClass(UTF8.class);
+    // the values are counts (ints)
+    countJob.setOutputValueClass(IntWritable.class);
+    
+    countJob.setMapperClass(MapClass.class);        
+    countJob.setCombinerClass(Reduce.class);
+    countJob.setReducerClass(Reduce.class);
+    
+    List other_args = new ArrayList();
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-m".equals(args[i])) {
+          countJob.setNumMapTasks(Integer.parseInt(args[++i]));
+        } else if ("-r".equals(args[i])) {
+          countJob.setNumReduceTasks(Integer.parseInt(args[++i]));
+        } else {
+          other_args.add(args[i]);
+        }
+      } catch (NumberFormatException except) {
+        System.out.println("ERROR: Integer expected instead of " + args[i]);
+        printUsage();
+      } catch (ArrayIndexOutOfBoundsException except) {
+        System.out.println("ERROR: Required parameter missing from " +
+                           args[i-1]);
+        printUsage(); // exits
+      }
+    }
+    // Make sure there are exactly 2 parameters left.
+    if (other_args.size() != 2) {
+      System.out.println("ERROR: Wrong number of parameters: " +
+          other_args.size() + " instead of 2.");
+      printUsage();
+    }
+    countJob.setInputDir(new File((String) other_args.get(0)));
+    countJob.setOutputDir(new File((String) other_args.get(1)));
+    
+    // Uncomment to run locally in a single process
+    // countJob.set("mapred.job.tracker", "local");
+    
+    JobClient.runJob(countJob);
+  }
+  
+}

Copied: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html (from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html?p2=lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html&p1=lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html&r1=376018&r2=376028&rev=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html Wed Feb  8 11:20:24 2006
@@ -1,5 +1,5 @@
 <html>
 <body>
-MapReduce examples.
+Hadoop example code.
 </body>
 </html>

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java Wed Feb  8 11:20:24 2006
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.jar.*;
+
+/**
+ * A micro-application that prints the main class name out of a jar file.
+ * @author Owen O'Malley
+ */
+public class PrintJarMainClass {
+  
+  /**
+   * @param args
+   */
+  public static void main(String[] args) {
+    try {
+      JarFile jar_file = new JarFile(args[0]);
+      if (jar_file != null) {
+        Manifest manifest = jar_file.getManifest();
+        if (manifest != null) {
+          String value = manifest.getMainAttributes().getValue("Main-Class");
+          if (value != null) {
+            System.out.println(value.replaceAll("/", "."));
+            return;
+          }
+        }
+      }
+    } catch (Throwable e) {
+      // ignore it
+    }
+    System.out.println("UNKNOWN");
+    System.exit(1);
+  }
+  
+}