You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/08/15 22:19:27 UTC

svn commit: r431692 - in /lucene/hadoop/trunk: ./ src/contrib/streaming/src/java/org/apache/hadoop/streaming/ src/examples/org/apache/hadoop/examples/ src/java/org/apache/hadoop/mapred/ src/java/org/apache/hadoop/mapred/lib/ src/java/org/apache/hadoop/...

Author: cutting
Date: Tue Aug 15 13:19:25 2006
New Revision: 431692

URL: http://svn.apache.org/viewvc?rev=431692&view=rev
Log:
HADOOP-450.  Change so that input types are determined by the RecordReader rather than specified directly in the JobConf.  Also replace use UTF8 in TextInputFormat with Text.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
    lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Aug 15 13:19:25 2006
@@ -25,6 +25,16 @@
  6. HADOOP-453.  Fix a bug in Text.setCapacity().  (siren via cutting)
 
 
+ 7. HADOOP-450.  Change so that input types are determined by the
+    RecordReader rather than specified directly in the JobConf.  This
+    facilitates jobs with a variety of input types.
+
+    WARNING: This contains incompatible API changes!  The RecordReader
+    interface has two new methods that all user-defined InputFormats
+    must now define.  Also, the values returned by TextInputFormat are
+    no longer of class UTF8, but now of class Text.
+
+
 Release 0.5.0 - 2006-08-04
 
  1. HADOOP-352.  Fix shell scripts to use /bin/sh instead of

Modified: lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java (original)
+++ lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java Tue Aug 15 13:19:25 2006
@@ -18,6 +18,7 @@
 
 import java.io.*;
 
+import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.fs.Path;
@@ -91,6 +92,14 @@
   public synchronized void close() throws IOException 
   { 
     in_.close(); 
+  }
+
+  public WritableComparable createKey() {
+    return new UTF8();
+  }
+  
+  public Writable createValue() {
+    return new UTF8();
   }
   
   /// StreamBaseRecordReader API

Modified: lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java (original)
+++ lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java Tue Aug 15 13:19:25 2006
@@ -19,6 +19,7 @@
 import java.io.*;
 
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -26,6 +27,7 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.JobConf;
 
+import org.apache.hadoop.util.ReflectionUtils;
 
 public class StreamSequenceRecordReader extends StreamBaseRecordReader
 {
@@ -86,6 +88,15 @@
     //return new SequenceFileRecordReader(job_, split_);
   }
 
+  public WritableComparable createKey() {
+    return (WritableComparable) 
+           ReflectionUtils.newInstance(rin_.getKeyClass(), null);
+  }
+  
+  public Writable createValue() {
+    return (Writable) ReflectionUtils.newInstance(rin_.getValueClass(), null);
+  }
+  
   boolean more_;
   SequenceFile.Reader rin_;
   int numFailed_;

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java Tue Aug 15 13:19:25 2006
@@ -24,8 +24,8 @@
 import org.apache.hadoop.mapred.lib.InverseMapper;
 import org.apache.hadoop.mapred.lib.LongSumReducer;
 
-import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -61,7 +61,7 @@
 
     grepJob.setOutputPath(tempDir);
     grepJob.setOutputFormat(SequenceFileOutputFormat.class);
-    grepJob.setOutputKeyClass(UTF8.class);
+    grepJob.setOutputKeyClass(Text.class);
     grepJob.setOutputValueClass(LongWritable.class);
 
     JobClient.runJob(grepJob);
@@ -71,8 +71,6 @@
 
     sortJob.setInputPath(tempDir);
     sortJob.setInputFormat(SequenceFileInputFormat.class);
-    sortJob.setInputKeyClass(UTF8.class);
-    sortJob.setInputValueClass(LongWritable.class);
 
     sortJob.setMapperClass(InverseMapper.class);
 

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java Tue Aug 15 13:19:25 2006
@@ -155,8 +155,6 @@
     // turn off speculative execution, because DFS doesn't handle
     // multiple writers to the same file.
     jobConf.setSpeculativeExecution(false);
-    jobConf.setInputKeyClass(IntWritable.class);
-    jobConf.setInputValueClass(IntWritable.class);
     jobConf.setInputFormat(SequenceFileInputFormat.class);
     jobConf.setOutputKeyClass(BytesWritable.class);
     jobConf.setOutputValueClass(BytesWritable.class);

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java Tue Aug 15 13:19:25 2006
@@ -148,8 +148,6 @@
     // turn off speculative execution, because DFS doesn't handle
     // multiple writers to the same file.
     jobConf.setSpeculativeExecution(false);
-    jobConf.setInputKeyClass(LongWritable.class);
-    jobConf.setInputValueClass(LongWritable.class);
     jobConf.setInputFormat(SequenceFileInputFormat.class);
         
     jobConf.setOutputKeyClass(LongWritable.class);

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java Tue Aug 15 13:19:25 2006
@@ -28,7 +28,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.ClusterStatus;
@@ -73,7 +73,7 @@
                     Writable value,
                     OutputCollector output, 
                     Reporter reporter) throws IOException {
-      String filename = ((UTF8) value).toString();
+      String filename = ((Text) value).toString();
       SequenceFile.Writer writer = 
         new SequenceFile.Writer(fileSys, new Path(filename), 
                                 BytesWritable.class, BytesWritable.class,

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java Tue Aug 15 13:19:25 2006
@@ -56,8 +56,6 @@
     jobConf.setInputFormat(SequenceFileInputFormat.class);
     jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    
-    jobConf.setInputKeyClass(BytesWritable.class);
-    jobConf.setInputValueClass(BytesWritable.class);
     jobConf.setOutputKeyClass(BytesWritable.class);
     jobConf.setOutputValueClass(BytesWritable.class);
     

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java Tue Aug 15 13:19:25 2006
@@ -21,7 +21,7 @@
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobClient;
@@ -53,12 +53,12 @@
   public static class MapClass extends MapReduceBase implements Mapper {
     
     private final static IntWritable one = new IntWritable(1);
-    private UTF8 word = new UTF8();
+    private Text word = new Text();
     
     public void map(WritableComparable key, Writable value, 
         OutputCollector output, 
         Reporter reporter) throws IOException {
-      String line = ((UTF8)value).toString();
+      String line = ((Text)value).toString();
       StringTokenizer itr = new StringTokenizer(line);
       while (itr.hasMoreTokens()) {
         word.set(itr.nextToken());
@@ -99,7 +99,7 @@
     conf.setJobName("wordcount");
  
     // the keys are words (strings)
-    conf.setOutputKeyClass(UTF8.class);
+    conf.setOutputKeyClass(Text.class);
     // the values are counts (ints)
     conf.setOutputValueClass(IntWritable.class);
     

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Tue Aug 15 13:19:25 2006
@@ -296,19 +296,24 @@
   public void setOutputFormat(Class theClass) {
     setClass("mapred.output.format.class", theClass, OutputFormat.class);
   }
-  
+
+  /** @deprecated Call {@link RecordReader#createKey()}. */
   public Class getInputKeyClass() {
     return getClass("mapred.input.key.class",
                     LongWritable.class, WritableComparable.class);
   }
+
+  /** @deprecated Not used */
   public void setInputKeyClass(Class theClass) {
     setClass("mapred.input.key.class", theClass, WritableComparable.class);
   }
 
+  /** @deprecated Call {@link RecordReader#createValue()}. */
   public Class getInputValueClass() {
     return getClass("mapred.input.value.class", UTF8.class, Writable.class);
   }
 
+  /** @deprecated Not used */
   public void setInputValueClass(Class theClass) {
     setClass("mapred.input.value.class", theClass, Writable.class);
   }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java Tue Aug 15 13:19:25 2006
@@ -26,15 +26,11 @@
 public class MapRunner implements MapRunnable {
   private JobConf job;
   private Mapper mapper;
-  private Class inputKeyClass;
-  private Class inputValueClass;
 
   public void configure(JobConf job) {
     this.job = job;
     this.mapper = (Mapper)ReflectionUtils.newInstance(job.getMapperClass(),
                                                       job);
-    this.inputKeyClass = job.getInputKeyClass();
-    this.inputValueClass = job.getInputValueClass();
   }
 
   public void run(RecordReader input, OutputCollector output,
@@ -42,10 +38,9 @@
     throws IOException {
     try {
       // allocate key & value instances that are re-used for all entries
-      WritableComparable key =
-        (WritableComparable)ReflectionUtils.newInstance(inputKeyClass, job);
-      Writable value = (Writable)ReflectionUtils.newInstance(inputValueClass,
-                                                             job);
+      WritableComparable key = input.createKey();
+      Writable value = input.createValue();
+      
       while (input.next(key, value)) {
         // map pair to output
         mapper.map(key, value, output, reporter);

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java Tue Aug 15 13:19:25 2006
@@ -54,13 +54,6 @@
       metricsRecord = Metrics.createRecord("mapred", "map", "taskid", taskId);
     }
     
-    private void reportMetric(String name, long value) {
-      if (metricsRecord != null) {
-        metricsRecord.setMetric(name, value);
-        metricsRecord.update();
-      }
-    }
-    
     synchronized void mapInput(long numBytes) {
       Metrics.report(metricsRecord, "input-records", ++numInputRecords);
       numInputBytes += numBytes;
@@ -172,6 +165,14 @@
       RecordReader in = new RecordReader() {      // wrap in progress reporter
           private float perByte = 1.0f /(float)split.getLength();
 
+          public WritableComparable createKey() {
+            return rawIn.createKey();
+          }
+          
+          public Writable createValue() {
+            return rawIn.createValue();
+          }
+          
           public synchronized boolean next(Writable key, Writable value)
             throws IOException {
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java Tue Aug 15 13:19:25 2006
@@ -20,6 +20,7 @@
 import java.io.DataInput;
 
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
 
 /** Reads key/value pairs from an input file {@link FileSplit}.
  * Implemented by {@link InputFormat} implementations. */
@@ -33,6 +34,18 @@
    * @see Writable#readFields(DataInput)
    */      
   boolean next(Writable key, Writable value) throws IOException;
+  
+  /**
+   * Create an object of the appropriate type to be used as a key.
+   * @return a new key object
+   */
+  WritableComparable createKey();
+  
+  /**
+   * Create an object of the appropriate type to be used as the value.
+   * @return a new value object
+   */
+  Writable createValue();
 
   /** Returns the current position in the input. */
   long getPos() throws IOException;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java Tue Aug 15 13:19:25 2006
@@ -62,8 +62,6 @@
   
   private ReduceTaskMetrics myMetrics = null;
   
-  private UTF8 jobId = new UTF8();
-
   private int numMaps;
   private boolean sortComplete;
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java Tue Aug 15 13:19:25 2006
@@ -20,21 +20,23 @@
 
 import org.apache.hadoop.fs.FileSystem;
 
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.*;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /** An {@link RecordReader} for {@link SequenceFile}s. */
 public class SequenceFileRecordReader implements RecordReader {
   private SequenceFile.Reader in;
   private long end;
   private boolean more = true;
+  private Configuration conf;
 
   public SequenceFileRecordReader(Configuration conf, FileSplit split)
     throws IOException {
     FileSystem fs = FileSystem.get(conf);
     this.in = new SequenceFile.Reader(fs, split.getPath(), conf);
     this.end = split.getStart() + split.getLength();
+    this.conf = conf;
 
     if (split.getStart() > in.getPosition())
       in.sync(split.getStart());                  // sync to start
@@ -51,6 +53,15 @@
    * #next(Writable,Writable)}.. */
   public Class getValueClass() { return in.getValueClass(); }
   
+  public WritableComparable createKey() {
+    return (WritableComparable) ReflectionUtils.newInstance(getKeyClass(), 
+                                                            conf);
+  }
+  
+  public Writable createValue() {
+    return (Writable) ReflectionUtils.newInstance(getValueClass(), conf);
+  }
+    
   public synchronized boolean next(Writable key, Writable value)
     throws IOException {
     if (!more) return false;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java Tue Aug 15 13:19:25 2006
@@ -21,9 +21,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
 
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.*;
 
 /** An {@link InputFormat} for plain text files.  Files are broken into lines.
  * Either linefeed or carriage-return are used to signal end of line.  Keys are
@@ -53,6 +51,15 @@
     }
 
     return new RecordReader() {
+      
+        public WritableComparable createKey() {
+          return new LongWritable();
+        }
+        
+        public Writable createValue() {
+          return new Text();
+        }
+        
         /** Read a line. */
         public synchronized boolean next(Writable key, Writable value)
           throws IOException {
@@ -61,7 +68,7 @@
             return false;
 
           ((LongWritable)key).set(pos);           // key is position
-          ((UTF8)value).set(readLine(in));        // value is line
+          ((Text)value).set(readLine(in));        // value is line
           return true;
         }
         

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java Tue Aug 15 13:19:25 2006
@@ -27,7 +27,7 @@
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 
 
 import java.util.regex.Pattern;
@@ -48,10 +48,10 @@
   public void map(WritableComparable key, Writable value,
                   OutputCollector output, Reporter reporter)
     throws IOException {
-    String text = ((UTF8)value).toString();
+    String text = ((Text)value).toString();
     Matcher matcher = pattern.matcher(text);
     while (matcher.find()) {
-      output.collect(new UTF8(matcher.group(group)), new LongWritable(1));
+      output.collect(new Text(matcher.group(group)), new LongWritable(1));
     }
   }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java Tue Aug 15 13:19:25 2006
@@ -27,7 +27,7 @@
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 
 
 /** A {@link Mapper} that maps text values into <token,freq> pairs.  Uses
@@ -38,13 +38,13 @@
                   OutputCollector output, Reporter reporter)
     throws IOException {
     // get input text
-    String text = ((UTF8)value).toString();       // value is line of text
+    String text = ((Text)value).toString();       // value is line of text
 
     // tokenize the value
     StringTokenizer st = new StringTokenizer(text);
     while (st.hasMoreTokens()) {
       // output <token,1> pairs
-      output.collect(new UTF8(st.nextToken()), new LongWritable(1));
+      output.collect(new Text(st.nextToken()), new LongWritable(1));
     }  
   }
   

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java Tue Aug 15 13:19:25 2006
@@ -25,7 +25,7 @@
 import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.CopyFiles;
@@ -73,17 +73,17 @@
     public void map(WritableComparable key, Writable value,
         OutputCollector output, Reporter reporter)
     throws IOException {
-      String text = ((UTF8)value).toString();
+      String text = ((Text)value).toString();
       Matcher matcher = pattern.matcher(text);
       while (matcher.find()) {
-        output.collect((UTF8)value, new LongWritable(1));
+        output.collect((Text)value, new LongWritable(1));
       }
     }
     
   }
   
   /** A WritableComparator optimized for UTF8 keys of the logs. */
-  public static class LogComparator extends UTF8.Comparator implements Configurable {
+  public static class LogComparator extends Text.Comparator implements Configurable {
     
     private static Log LOG = LogFactory.getLog("org.apache.hadoop.tools.Logalyzer");
     private JobConf conf = null;
@@ -119,12 +119,12 @@
       }
       
       try {
-        UTF8 logline1 = new UTF8(); 
+        Text logline1 = new Text(); 
         logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
         String line1 = logline1.toString();
         String[] logColumns1 = line1.split(columnSeparator);
         
-        UTF8 logline2 = new UTF8(); 
+        Text logline2 = new Text(); 
         logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
         String line2 = logline2.toString();
         String[] logColumns2 = line2.split(columnSeparator);
@@ -161,7 +161,7 @@
     
     static {                                        
       // register this comparator
-      WritableComparator.define(UTF8.class, new LogComparator());
+      WritableComparator.define(Text.class, new LogComparator());
     }
   }
   
@@ -209,8 +209,6 @@
     
     grepJob.setInputPath(grepInput);
     grepJob.setInputFormat(TextInputFormat.class);
-    grepJob.setInputKeyClass(LongWritable.class);
-    grepJob.setInputValueClass(UTF8.class);
     
     grepJob.setMapperClass(LogRegexMapper.class);
     grepJob.set("mapred.mapper.regex", grepPattern);
@@ -222,7 +220,7 @@
     
     grepJob.setOutputPath(analysisOutput);
     grepJob.setOutputFormat(TextOutputFormat.class);
-    grepJob.setOutputKeyClass(UTF8.class);
+    grepJob.setOutputKeyClass(Text.class);
     grepJob.setOutputValueClass(LongWritable.class);
     grepJob.setOutputKeyComparatorClass(LogComparator.class);
     

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java Tue Aug 15 13:19:25 2006
@@ -40,7 +40,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.ClusterStatus;
@@ -265,12 +265,10 @@
       // turn off speculative execution, because DFS doesn't handle
       // multiple writers to the same file.
       jobConf.setSpeculativeExecution(false);
-      jobConf.setInputKeyClass(UTF8.class);
-      jobConf.setInputValueClass(UTF8.class);
       jobConf.setInputFormat(SequenceFileInputFormat.class);
       
-      jobConf.setOutputKeyClass(UTF8.class);
-      jobConf.setOutputValueClass(UTF8.class);
+      jobConf.setOutputKeyClass(Text.class);
+      jobConf.setOutputValueClass(Text.class);
       jobConf.setOutputFormat(SequenceFileOutputFormat.class);
       
       jobConf.setMapperClass(DFSCopyFilesMapper.class);
@@ -332,10 +330,10 @@
       for(int idx=0; idx < numMaps; ++idx) {
         Path file = new Path(inDir, "part"+idx);
         SequenceFile.Writer writer = 
-          new SequenceFile.Writer(fileSys, file, UTF8.class, UTF8.class);
+          new SequenceFile.Writer(fileSys, file, Text.class, Text.class);
         for (int ipath = idx; ipath < nFiles; ipath += numMaps) {
           String path = (String) finalPathList.get(ipath);
-          writer.append(new UTF8(path), new UTF8(""));
+          writer.append(new Text(path), new Text(""));
         }
         writer.close();
       }
@@ -388,7 +386,7 @@
         Writable value,
         OutputCollector out,
         Reporter reporter) throws IOException {
-      String src = ((UTF8) key).toString();
+      String src = ((Text) key).toString();
       try {
         copy(src, reporter);
       } catch (IOException except) {
@@ -449,12 +447,10 @@
       //Setup the MR-job configuration
       jobConf.setSpeculativeExecution(false);
       
-      jobConf.setInputKeyClass(UTF8.class);
-      jobConf.setInputValueClass(UTF8.class);
       jobConf.setInputFormat(SequenceFileInputFormat.class);
       
-      jobConf.setOutputKeyClass(UTF8.class);
-      jobConf.setOutputValueClass(UTF8.class);
+      jobConf.setOutputKeyClass(Text.class);
+      jobConf.setOutputValueClass(Text.class);
       jobConf.setOutputFormat(SequenceFileOutputFormat.class);
       
       jobConf.setMapperClass(HTTPCopyFilesMapper.class);
@@ -492,8 +488,8 @@
       for(int i=0; i < srcPaths.length; ++i) {
         Path ipFile = new Path(jobInputDir, "part" + i);
         SequenceFile.Writer writer = 
-          new SequenceFile.Writer(fileSystem, ipFile, UTF8.class, UTF8.class);
-        writer.append(new UTF8(srcPaths[i]), new UTF8(""));
+          new SequenceFile.Writer(fileSystem, ipFile, Text.class, Text.class);
+        writer.append(new Text(srcPaths[i]), new Text(""));
         writer.close();
       }
     }	
@@ -538,7 +534,7 @@
         {
       //The url of the file
       try {
-        srcURI = new URI(((UTF8)key).toString());
+        srcURI = new URI(((Text)key).toString());
         
         //Construct the complete destination path
         File urlPath = new File(srcURI.getPath());
@@ -574,11 +570,11 @@
             " to: " + destinationPath.toString());
         
       } catch(Exception e) {
-        reporter.setStatus("Failed to copy from: " + (UTF8)key);
+        reporter.setStatus("Failed to copy from: " + (Text)key);
         if(ignoreReadFailures) {
           return;
         } else {
-          throw new IOException("Failed to copy from: " + (UTF8)key);
+          throw new IOException("Failed to copy from: " + (Text)key);
         }
       }
     }

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java Tue Aug 15 13:19:25 2006
@@ -108,7 +108,7 @@
             int keyint = ((IntWritable) key).get();
             while (it.hasNext()) {
                 int val = ((IntWritable) it.next()).get();
-                out.collect(new UTF8("" + val), new UTF8(""));
+                out.collect(new Text("" + val), new Text(""));
             }
         }
         public void close() {
@@ -137,7 +137,7 @@
 
         public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
             long pos = ((LongWritable) key).get();
-            UTF8 str = (UTF8) val;
+            Text str = (Text) val;
 
             out.collect(new IntWritable(Integer.parseInt(str.toString().trim())), new IntWritable(1));
         }
@@ -231,8 +231,8 @@
       public void map(WritableComparable key, Writable value,
                       OutputCollector output, Reporter reporter
                       ) throws IOException {
-        String str = ((UTF8) value).toString().toLowerCase();
-        output.collect(new UTF8(str), value);
+        String str = ((Text) value).toString().toLowerCase();
+        output.collect(new Text(str), value);
       }
 
       public void close() throws IOException {
@@ -299,8 +299,8 @@
       conf.setOutputPath(outDir);
       conf.setMapperClass(MyMap.class);
       conf.setReducerClass(MyReduce.class);
-      conf.setOutputKeyClass(UTF8.class);
-      conf.setOutputValueClass(UTF8.class);
+      conf.setOutputKeyClass(Text.class);
+      conf.setOutputValueClass(Text.class);
       conf.setOutputFormat(SequenceFileOutputFormat.class);
       if (includeCombine) {
         conf.setCombinerClass(IdentityReducer.class);
@@ -456,8 +456,6 @@
         fs.delete(intermediateOuts);
         JobConf checkJob = new JobConf(conf);
         checkJob.setInputPath(randomOuts);
-        checkJob.setInputKeyClass(LongWritable.class);
-        checkJob.setInputValueClass(UTF8.class);
         checkJob.setInputFormat(TextInputFormat.class);
         checkJob.setMapperClass(RandomCheckMapper.class);
 

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java Tue Aug 15 13:19:25 2006
@@ -24,7 +24,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.examples.WordCount;
 
 /**
@@ -58,7 +58,7 @@
     conf.setJobName("wordcount");
     
     // the keys are words (strings)
-    conf.setOutputKeyClass(UTF8.class);
+    conf.setOutputKeyClass(Text.class);
     // the values are counts (ints)
     conf.setOutputValueClass(IntWritable.class);
     

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java Tue Aug 15 13:19:25 2006
@@ -20,14 +20,11 @@
 import java.util.*;
 import junit.framework.TestCase;
 
-import org.apache.commons.logging.*;
-
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.conf.*;
 
 public class TestTextInputFormat extends TestCase {
-  private static final Log LOG = InputFormatBase.LOG;
 
   private static int MAX_LENGTH = 10000;
   private static Configuration conf = new Configuration();
@@ -70,7 +67,7 @@
       // try splitting the file in a variety of sizes
       InputFormat format = new TextInputFormat();
       LongWritable key = new LongWritable();
-      UTF8 value = new UTF8();
+      Text value = new Text();
       for (int i = 0; i < 3; i++) {
         int numSplits = random.nextInt(MAX_LENGTH/20)+1;
         //LOG.info("splitting: requesting = " + numSplits);