You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/08 20:20:26 UTC
svn commit: r376028 - in /lucene/hadoop/trunk: ./ bin/ src/examples/
src/examples/org/ src/examples/org/apache/ src/examples/org/apache/hadoop/
src/examples/org/apache/hadoop/examples/
src/java/org/apache/hadoop/mapred/demo/ src/java/org/apache/hadoop/...
Author: cutting
Date: Wed Feb 8 11:20:24 2006
New Revision: 376028
URL: http://svn.apache.org/viewcvs?rev=376028&view=rev
Log:
Fix HADOOP-25: improve example code & package separately. Contributed by Owen O'Malley.
Added:
lucene/hadoop/trunk/src/examples/
lucene/hadoop/trunk/src/examples/org/
lucene/hadoop/trunk/src/examples/org/apache/
lucene/hadoop/trunk/src/examples/org/apache/hadoop/
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
- copied, changed from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html
- copied, changed from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java
Removed:
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/
Modified:
lucene/hadoop/trunk/bin/hadoop
lucene/hadoop/trunk/build.xml
Modified: lucene/hadoop/trunk/bin/hadoop
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop?rev=376028&r1=376027&r2=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/bin/hadoop (original)
+++ lucene/hadoop/trunk/bin/hadoop Wed Feb 8 11:20:24 2006
@@ -34,6 +34,7 @@
echo " jobtracker run the MapReduce job Tracker node"
echo " tasktracker run a MapReduce task Tracker node"
echo " job manipulate MapReduce jobs"
+ echo " jar <jar> run a jar file"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters."
@@ -117,6 +118,15 @@
CLASS=org.apache.hadoop.mapred.TaskTracker
elif [ "$COMMAND" = "job" ] ; then
CLASS=org.apache.hadoop.mapred.JobClient
+elif [ "$COMMAND" = "jar" ] ; then
+ JAR="$1"
+ shift
+ CLASS=`"$0" org.apache.hadoop.util.PrintJarMainClass "$JAR"`
+ if [ $? != 0 ]; then
+ echo "Error: Could not find main class in jar file $JAR"
+ exit 1
+ fi
+ CLASSPATH=${CLASSPATH}:${JAR}
else
CLASS=$COMMAND
fi
Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/build.xml?rev=376028&r1=376027&r2=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Wed Feb 8 11:20:24 2006
@@ -15,6 +15,7 @@
<property name="basedir" value="./"/>
<property name="src.dir" value="src/java"/>
+ <property name="examples.dir" value="src/examples"/>
<property name="lib.dir" value="lib"/>
<property name="conf.dir" value="conf"/>
<property name="docs.dir" value="docs"/>
@@ -23,6 +24,7 @@
<property name="build.dir" value="build"/>
<property name="build.classes" value="${build.dir}/classes"/>
<property name="build.webapps" value="${build.dir}/webapps"/>
+ <property name="build.examples" value="${build.dir}/examples"/>
<property name="build.docs" value="${build.dir}/docs"/>
<property name="build.javadoc" value="${build.docs}/api"/>
<property name="build.encoding" value="ISO-8859-1"/>
@@ -70,6 +72,7 @@
<mkdir dir="${build.dir}"/>
<mkdir dir="${build.classes}"/>
<mkdir dir="${build.webapps}"/>
+ <mkdir dir="${build.examples}"/>
<mkdir dir="${test.build.dir}"/>
<mkdir dir="${test.build.classes}"/>
@@ -106,6 +109,21 @@
</javac>
</target>
+ <target name="compile-examples" depends="compile">
+ <javac
+ encoding="${build.encoding}"
+ srcdir="${examples.dir}"
+ includes="org/apache/hadoop/**/*.java"
+ destdir="${build.examples}"
+ debug="${javac.debug}"
+ optimize="${javac.optimize}"
+ target="${javac.version}"
+ source="${javac.version}"
+ deprecation="${javac.deprecation}">
+ <classpath refid="classpath"/>
+ </javac>
+ </target>
+
<!-- ================================================================== -->
<!-- Make hadoop.jar -->
<!-- ================================================================== -->
@@ -127,6 +145,21 @@
</target>
<!-- ================================================================== -->
+ <!-- Make the Hadoop examples jar. -->
+ <!-- ================================================================== -->
+ <!-- -->
+ <!-- ================================================================== -->
+ <target name="examples" depends="jar, compile-examples">
+ <jar jarfile="${build.dir}/${final.name}-examples.jar"
+ basedir="${build.examples}">
+ <manifest>
+ <attribute name="Main-Class"
+ value="org/apache/hadoop/examples/ExampleDriver"/>
+ </manifest>
+ </jar>
+ </target>
+
+ <!-- ================================================================== -->
<!-- Compile test code -->
<!-- ================================================================== -->
<target name="compile-test" depends="compile">
@@ -147,7 +180,7 @@
<!-- ================================================================== -->
<!-- Run unit tests -->
<!-- ================================================================== -->
- <target name="test" depends="compile, compile-test">
+ <target name="test" depends="compile, examples, compile-test">
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
@@ -190,8 +223,13 @@
bottom="Copyright &copy; ${year} The Apache Software Foundation"
>
<packageset dir="${src.dir}"/>
+ <packageset dir="${examples.dir}"/>
<link href="${javadoc.link.java}"/>
<classpath refid="classpath"/>
+
+ <group title="Core" packages="org.apache.hadoop.*"/>
+ <group title="Examples" packages="org.apache.hadoop.examples"/>
+
</javadoc>
</target>
@@ -205,7 +243,7 @@
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
- <target name="package" depends="jar, javadoc">
+ <target name="package" depends="jar, javadoc, examples">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/lib"/>
<mkdir dir="${dist.dir}/bin"/>
@@ -221,6 +259,8 @@
</copy>
<copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
+
+ <copy file="${build.dir}/${name}-examples.jar" todir="${dist.dir}"/>
<copy todir="${dist.dir}/bin">
<fileset dir="bin"/>
Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/ExampleDriver.java Wed Feb 8 11:20:24 2006
@@ -0,0 +1,131 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.TreeMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class ExampleDriver {
+
+ /**
+ * A description of an example program based on its class and a
+ * human-readable description.
+ * @author Owen O'Malley
+ * @date feb 2006
+ */
+ static private class ProgramDescription {
+
+ static final Class[] paramTypes = new Class[] {String[].class};
+
+ /**
+ * Create a description of an example program.
+ * @param mainClass the class with the main for the example program
+ * @param description a string to display to the user in help messages
+ * @throws SecurityException if we can't use reflection
+ * @throws NoSuchMethodException if the class doesn't have a main method
+ */
+ public ProgramDescription(Class mainClass,
+ String description)
+ throws SecurityException, NoSuchMethodException {
+ this.main = mainClass.getMethod("main", paramTypes);
+ this.description = description;
+ }
+
+ /**
+ * Invoke the example application with the given arguments
+ * @param args the arguments for the application
+ * @throws Throwable The exception thrown by the invoked method
+ */
+ public void invoke(String[] args)
+ throws Throwable {
+ try {
+ main.invoke(null, new Object[]{args});
+ } catch (InvocationTargetException except) {
+ throw except.getCause();
+ }
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ private Method main;
+ private String description;
+ }
+
+ private static void printUsage(Map programs) {
+ System.out.println("Valid program names are:");
+ for(Iterator itr=programs.entrySet().iterator(); itr.hasNext();) {
+ Map.Entry item = (Entry) itr.next();
+ System.out.println(" " + (String) item.getKey() + ": " +
+ ((ProgramDescription) item.getValue()).getDescription());
+ }
+ }
+
+ /**
+ * This is a driver for the example programs.
+ * It looks at the first command line argument and tries to find an
+ * example program with that name.
+ * If it is found, it calls the main method in that class with the rest
+ * of the command line arguments.
+ * @param args The argument from the user. args[0] is the command to run.
+ * @throws NoSuchMethodException
+ * @throws SecurityException
+ * @throws IllegalAccessException
+ * @throws IllegalArgumentException
+ * @throws Throwable Anything thrown by the example program's main
+ */
+ public static void main(String[] args)
+ throws Throwable
+ {
+ Map programs = new TreeMap();
+
+ // Add new programs to this list
+ programs.put("wordcount", new ProgramDescription(WordCount.class,
+ "A map/reduce program that counts the words in the input files."));
+ programs.put("grep", new ProgramDescription(Grep.class,
+ "A map/reduce program that counts the matches of a regex in the input."));
+
+ // Make sure they gave us a program name.
+ if (args.length == 0) {
+ System.out.println("An example program must be given as the" +
+ " first argument.");
+ printUsage(programs);
+ return;
+ }
+
+ // And that it is good.
+ ProgramDescription pgm = (ProgramDescription) programs.get(args[0]);
+ if (pgm == null) {
+ System.out.println("Unknown program '" + args[0] + "' chosen.");
+ printUsage(programs);
+ return;
+ }
+
+ // Remove the leading argument and call main
+ String[] new_args = new String[args.length - 1];
+ for(int i=1; i < args.length; ++i) {
+ new_args[i-1] = args[i];
+ }
+ pgm.invoke(new_args);
+ }
+
+}
Copied: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java (from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java?p2=lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java&p1=lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java&r1=376018&r2=376028&rev=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java Wed Feb 8 11:20:24 2006
@@ -13,19 +13,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.hadoop.mapred.demo;
+package org.apache.hadoop.examples;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.lib.RegexMapper;
import org.apache.hadoop.mapred.lib.InverseMapper;
import org.apache.hadoop.mapred.lib.LongSumReducer;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.conf.Configuration;
Added: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java (added)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java Wed Feb 8 11:20:24 2006
@@ -0,0 +1,156 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.examples;
+
+import java.io.*;
+import java.net.URL;
+import java.util.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * This is an example Hadoop Map/Reduce application.
+ * It reads the text input files, breaks each line into words
+ * and counts them. The output is a locally sorted list of words and the
+ * count of how often they occurred.
+ *
+ * To run: bin/hadoop jar build/hadoop-examples.jar wordcount
+ * [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i>
+ *
+ * @author Owen O'Malley
+ */
+public class WordCount {
+
+ /**
+ * Counts the words in each line.
+ * For each line of input, break the line into words and emit them as
+ * (<b>word</b>, <b>1</b>).
+ */
+ public static class MapClass implements Mapper {
+
+ private final static IntWritable one = new IntWritable(1);
+
+ public void map(WritableComparable key, Writable value,
+ OutputCollector output,
+ Reporter reporter) throws IOException {
+ String line = ((UTF8)value).toString();
+ StringTokenizer itr = new StringTokenizer(line);
+ while (itr.hasMoreTokens()) {
+ String word = itr.nextToken();
+ output.collect(new UTF8(word), one);
+ }
+ }
+
+ public void configure(JobConf job) {
+ }
+
+ }
+
+ /**
+ * A reducer class that just emits the sum of the input values.
+ */
+ public static class Reduce implements Reducer {
+
+ public void reduce(WritableComparable key, Iterator values,
+ OutputCollector output,
+ Reporter reporter) throws IOException {
+ int sum = 0;
+ while (values.hasNext()) {
+ sum += ((IntWritable) values.next()).get();
+ }
+ output.collect(key, new IntWritable(sum));
+ }
+
+ public void configure(JobConf job) {
+ }
+
+ }
+
+ static void printUsage() {
+ System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
+ System.exit(1);
+ }
+
+ /**
+ * The main driver for word count map/reduce program.
+ * Invoke this method to submit the map/reduce job.
+ * @throws IOException When there is communication problems with the
+ * job tracker.
+ */
+ public static void main(String[] args) throws IOException {
+ Configuration defaults = new Configuration();
+
+ JobConf countJob = new JobConf(defaults);
+
+ URL jar_url = WordCount.class.getClassLoader().
+ getResource("hadoop-examples.jar");
+ countJob.setJar(jar_url.getPath());
+
+ // the keys are words (strings)
+ countJob.setOutputKeyClass(UTF8.class);
+ // the values are counts (ints)
+ countJob.setOutputValueClass(IntWritable.class);
+
+ countJob.setMapperClass(MapClass.class);
+ countJob.setCombinerClass(Reduce.class);
+ countJob.setReducerClass(Reduce.class);
+
+ List other_args = new ArrayList();
+ for(int i=0; i < args.length; ++i) {
+ try {
+ if ("-m".equals(args[i])) {
+ countJob.setNumMapTasks(Integer.parseInt(args[++i]));
+ } else if ("-r".equals(args[i])) {
+ countJob.setNumReduceTasks(Integer.parseInt(args[++i]));
+ } else {
+ other_args.add(args[i]);
+ }
+ } catch (NumberFormatException except) {
+ System.out.println("ERROR: Integer expected instead of " + args[i]);
+ printUsage();
+ } catch (ArrayIndexOutOfBoundsException except) {
+ System.out.println("ERROR: Required parameter missing from " +
+ args[i-1]);
+ printUsage(); // exits
+ }
+ }
+ // Make sure there are exactly 2 parameters left.
+ if (other_args.size() != 2) {
+ System.out.println("ERROR: Wrong number of parameters: " +
+ other_args.size() + " instead of 2.");
+ printUsage();
+ }
+ countJob.setInputDir(new File((String) other_args.get(0)));
+ countJob.setOutputDir(new File((String) other_args.get(1)));
+
+ // Uncomment to run locally in a single process
+ // countJob.set("mapred.job.tracker", "local");
+
+ JobClient.runJob(countJob);
+ }
+
+}
Copied: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html (from r376018, lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html?p2=lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html&p1=lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html&r1=376018&r2=376028&rev=376028&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/package.html (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/package.html Wed Feb 8 11:20:24 2006
@@ -1,5 +1,5 @@
<html>
<body>
-MapReduce examples.
+Hadoop example code.
</body>
</html>
Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java?rev=376028&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/PrintJarMainClass.java Wed Feb 8 11:20:24 2006
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.jar.*;
+
+/**
+ * A micro-application that prints the main class name out of a jar file.
+ * @author Owen O'Malley
+ */
+public class PrintJarMainClass {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ try {
+ JarFile jar_file = new JarFile(args[0]);
+ if (jar_file != null) {
+ Manifest manifest = jar_file.getManifest();
+ if (manifest != null) {
+ String value = manifest.getMainAttributes().getValue("Main-Class");
+ if (value != null) {
+ System.out.println(value.replaceAll("/", "."));
+ return;
+ }
+ }
+ }
+ } catch (Throwable e) {
+ // ignore it
+ }
+ System.out.println("UNKNOWN");
+ System.exit(1);
+ }
+
+}