You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/08/15 22:19:27 UTC
svn commit: r431692 - in /lucene/hadoop/trunk: ./
src/contrib/streaming/src/java/org/apache/hadoop/streaming/
src/examples/org/apache/hadoop/examples/ src/java/org/apache/hadoop/mapred/
src/java/org/apache/hadoop/mapred/lib/ src/java/org/apache/hadoop/...
Author: cutting
Date: Tue Aug 15 13:19:25 2006
New Revision: 431692
URL: http://svn.apache.org/viewvc?rev=431692&view=rev
Log:
HADOOP-450. Change so that input types are determined by the RecordReader rather than specified directly in the JobConf. Also replace use UTF8 in TextInputFormat with Text.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java
lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Aug 15 13:19:25 2006
@@ -25,6 +25,16 @@
6. HADOOP-453. Fix a bug in Text.setCapacity(). (siren via cutting)
+ 7. HADOOP-450. Change so that input types are determined by the
+ RecordReader rather than specified directly in the JobConf. This
+ facilitates jobs with a variety of input types.
+
+ WARNING: This contains incompatible API changes! The RecordReader
+ interface has two new methods that all user-defined InputFormats
+ must now define. Also, the values returned by TextInputFormat are
+ no longer of class UTF8, but now of class Text.
+
+
Release 0.5.0 - 2006-08-04
1. HADOOP-352. Fix shell scripts to use /bin/sh instead of
Modified: lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java (original)
+++ lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java Tue Aug 15 13:19:25 2006
@@ -18,6 +18,7 @@
import java.io.*;
+import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.fs.Path;
@@ -91,6 +92,14 @@
public synchronized void close() throws IOException
{
in_.close();
+ }
+
+ public WritableComparable createKey() {
+ return new UTF8();
+ }
+
+ public Writable createValue() {
+ return new UTF8();
}
/// StreamBaseRecordReader API
Modified: lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java (original)
+++ lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamSequenceRecordReader.java Tue Aug 15 13:19:25 2006
@@ -19,6 +19,7 @@
import java.io.*;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -26,6 +27,7 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
public class StreamSequenceRecordReader extends StreamBaseRecordReader
{
@@ -86,6 +88,15 @@
//return new SequenceFileRecordReader(job_, split_);
}
+ public WritableComparable createKey() {
+ return (WritableComparable)
+ ReflectionUtils.newInstance(rin_.getKeyClass(), null);
+ }
+
+ public Writable createValue() {
+ return (Writable) ReflectionUtils.newInstance(rin_.getValueClass(), null);
+ }
+
boolean more_;
SequenceFile.Reader rin_;
int numFailed_;
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Grep.java Tue Aug 15 13:19:25 2006
@@ -24,8 +24,8 @@
import org.apache.hadoop.mapred.lib.InverseMapper;
import org.apache.hadoop.mapred.lib.LongSumReducer;
-import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -61,7 +61,7 @@
grepJob.setOutputPath(tempDir);
grepJob.setOutputFormat(SequenceFileOutputFormat.class);
- grepJob.setOutputKeyClass(UTF8.class);
+ grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
JobClient.runJob(grepJob);
@@ -71,8 +71,6 @@
sortJob.setInputPath(tempDir);
sortJob.setInputFormat(SequenceFileInputFormat.class);
- sortJob.setInputKeyClass(UTF8.class);
- sortJob.setInputValueClass(LongWritable.class);
sortJob.setMapperClass(InverseMapper.class);
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/NNBench.java Tue Aug 15 13:19:25 2006
@@ -155,8 +155,6 @@
// turn off speculative execution, because DFS doesn't handle
// multiple writers to the same file.
jobConf.setSpeculativeExecution(false);
- jobConf.setInputKeyClass(IntWritable.class);
- jobConf.setInputValueClass(IntWritable.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputKeyClass(BytesWritable.class);
jobConf.setOutputValueClass(BytesWritable.class);
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/PiBenchmark.java Tue Aug 15 13:19:25 2006
@@ -148,8 +148,6 @@
// turn off speculative execution, because DFS doesn't handle
// multiple writers to the same file.
jobConf.setSpeculativeExecution(false);
- jobConf.setInputKeyClass(LongWritable.class);
- jobConf.setInputValueClass(LongWritable.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputKeyClass(LongWritable.class);
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java Tue Aug 15 13:19:25 2006
@@ -28,7 +28,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.ClusterStatus;
@@ -73,7 +73,7 @@
Writable value,
OutputCollector output,
Reporter reporter) throws IOException {
- String filename = ((UTF8) value).toString();
+ String filename = ((Text) value).toString();
SequenceFile.Writer writer =
new SequenceFile.Writer(fileSys, new Path(filename),
BytesWritable.class, BytesWritable.class,
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/Sort.java Tue Aug 15 13:19:25 2006
@@ -56,8 +56,6 @@
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
- jobConf.setInputKeyClass(BytesWritable.class);
- jobConf.setInputValueClass(BytesWritable.class);
jobConf.setOutputKeyClass(BytesWritable.class);
jobConf.setOutputValueClass(BytesWritable.class);
Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/WordCount.java Tue Aug 15 13:19:25 2006
@@ -21,7 +21,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobClient;
@@ -53,12 +53,12 @@
public static class MapClass extends MapReduceBase implements Mapper {
private final static IntWritable one = new IntWritable(1);
- private UTF8 word = new UTF8();
+ private Text word = new Text();
public void map(WritableComparable key, Writable value,
OutputCollector output,
Reporter reporter) throws IOException {
- String line = ((UTF8)value).toString();
+ String line = ((Text)value).toString();
StringTokenizer itr = new StringTokenizer(line);
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
@@ -99,7 +99,7 @@
conf.setJobName("wordcount");
// the keys are words (strings)
- conf.setOutputKeyClass(UTF8.class);
+ conf.setOutputKeyClass(Text.class);
// the values are counts (ints)
conf.setOutputValueClass(IntWritable.class);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Tue Aug 15 13:19:25 2006
@@ -296,19 +296,24 @@
public void setOutputFormat(Class theClass) {
setClass("mapred.output.format.class", theClass, OutputFormat.class);
}
-
+
+ /** @deprecated Call {@link RecordReader#createKey()}. */
public Class getInputKeyClass() {
return getClass("mapred.input.key.class",
LongWritable.class, WritableComparable.class);
}
+
+ /** @deprecated Not used */
public void setInputKeyClass(Class theClass) {
setClass("mapred.input.key.class", theClass, WritableComparable.class);
}
+ /** @deprecated Call {@link RecordReader#createValue()}. */
public Class getInputValueClass() {
return getClass("mapred.input.value.class", UTF8.class, Writable.class);
}
+ /** @deprecated Not used */
public void setInputValueClass(Class theClass) {
setClass("mapred.input.value.class", theClass, Writable.class);
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java Tue Aug 15 13:19:25 2006
@@ -26,15 +26,11 @@
public class MapRunner implements MapRunnable {
private JobConf job;
private Mapper mapper;
- private Class inputKeyClass;
- private Class inputValueClass;
public void configure(JobConf job) {
this.job = job;
this.mapper = (Mapper)ReflectionUtils.newInstance(job.getMapperClass(),
job);
- this.inputKeyClass = job.getInputKeyClass();
- this.inputValueClass = job.getInputValueClass();
}
public void run(RecordReader input, OutputCollector output,
@@ -42,10 +38,9 @@
throws IOException {
try {
// allocate key & value instances that are re-used for all entries
- WritableComparable key =
- (WritableComparable)ReflectionUtils.newInstance(inputKeyClass, job);
- Writable value = (Writable)ReflectionUtils.newInstance(inputValueClass,
- job);
+ WritableComparable key = input.createKey();
+ Writable value = input.createValue();
+
while (input.next(key, value)) {
// map pair to output
mapper.map(key, value, output, reporter);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java Tue Aug 15 13:19:25 2006
@@ -54,13 +54,6 @@
metricsRecord = Metrics.createRecord("mapred", "map", "taskid", taskId);
}
- private void reportMetric(String name, long value) {
- if (metricsRecord != null) {
- metricsRecord.setMetric(name, value);
- metricsRecord.update();
- }
- }
-
synchronized void mapInput(long numBytes) {
Metrics.report(metricsRecord, "input-records", ++numInputRecords);
numInputBytes += numBytes;
@@ -172,6 +165,14 @@
RecordReader in = new RecordReader() { // wrap in progress reporter
private float perByte = 1.0f /(float)split.getLength();
+ public WritableComparable createKey() {
+ return rawIn.createKey();
+ }
+
+ public Writable createValue() {
+ return rawIn.createValue();
+ }
+
public synchronized boolean next(Writable key, Writable value)
throws IOException {
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java Tue Aug 15 13:19:25 2006
@@ -20,6 +20,7 @@
import java.io.DataInput;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
/** Reads key/value pairs from an input file {@link FileSplit}.
* Implemented by {@link InputFormat} implementations. */
@@ -33,6 +34,18 @@
* @see Writable#readFields(DataInput)
*/
boolean next(Writable key, Writable value) throws IOException;
+
+ /**
+ * Create an object of the appropriate type to be used as a key.
+ * @return a new key object
+ */
+ WritableComparable createKey();
+
+ /**
+ * Create an object of the appropriate type to be used as the value.
+ * @return a new value object
+ */
+ Writable createValue();
/** Returns the current position in the input. */
long getPos() throws IOException;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java Tue Aug 15 13:19:25 2006
@@ -62,8 +62,6 @@
private ReduceTaskMetrics myMetrics = null;
- private UTF8 jobId = new UTF8();
-
private int numMaps;
private boolean sortComplete;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java Tue Aug 15 13:19:25 2006
@@ -20,21 +20,23 @@
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
/** An {@link RecordReader} for {@link SequenceFile}s. */
public class SequenceFileRecordReader implements RecordReader {
private SequenceFile.Reader in;
private long end;
private boolean more = true;
+ private Configuration conf;
public SequenceFileRecordReader(Configuration conf, FileSplit split)
throws IOException {
FileSystem fs = FileSystem.get(conf);
this.in = new SequenceFile.Reader(fs, split.getPath(), conf);
this.end = split.getStart() + split.getLength();
+ this.conf = conf;
if (split.getStart() > in.getPosition())
in.sync(split.getStart()); // sync to start
@@ -51,6 +53,15 @@
* #next(Writable,Writable)}.. */
public Class getValueClass() { return in.getValueClass(); }
+ public WritableComparable createKey() {
+ return (WritableComparable) ReflectionUtils.newInstance(getKeyClass(),
+ conf);
+ }
+
+ public Writable createValue() {
+ return (Writable) ReflectionUtils.newInstance(getValueClass(), conf);
+ }
+
public synchronized boolean next(Writable key, Writable value)
throws IOException {
if (!more) return false;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java Tue Aug 15 13:19:25 2006
@@ -21,9 +21,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.*;
/** An {@link InputFormat} for plain text files. Files are broken into lines.
* Either linefeed or carriage-return are used to signal end of line. Keys are
@@ -53,6 +51,15 @@
}
return new RecordReader() {
+
+ public WritableComparable createKey() {
+ return new LongWritable();
+ }
+
+ public Writable createValue() {
+ return new Text();
+ }
+
/** Read a line. */
public synchronized boolean next(Writable key, Writable value)
throws IOException {
@@ -61,7 +68,7 @@
return false;
((LongWritable)key).set(pos); // key is position
- ((UTF8)value).set(readLine(in)); // value is line
+ ((Text)value).set(readLine(in)); // value is line
return true;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java Tue Aug 15 13:19:25 2006
@@ -27,7 +27,7 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import java.util.regex.Pattern;
@@ -48,10 +48,10 @@
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter)
throws IOException {
- String text = ((UTF8)value).toString();
+ String text = ((Text)value).toString();
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
- output.collect(new UTF8(matcher.group(group)), new LongWritable(1));
+ output.collect(new Text(matcher.group(group)), new LongWritable(1));
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java Tue Aug 15 13:19:25 2006
@@ -27,7 +27,7 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
/** A {@link Mapper} that maps text values into <token,freq> pairs. Uses
@@ -38,13 +38,13 @@
OutputCollector output, Reporter reporter)
throws IOException {
// get input text
- String text = ((UTF8)value).toString(); // value is line of text
+ String text = ((Text)value).toString(); // value is line of text
// tokenize the value
StringTokenizer st = new StringTokenizer(text);
while (st.hasMoreTokens()) {
// output <token,1> pairs
- output.collect(new UTF8(st.nextToken()), new LongWritable(1));
+ output.collect(new Text(st.nextToken()), new LongWritable(1));
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/tools/Logalyzer.java Tue Aug 15 13:19:25 2006
@@ -25,7 +25,7 @@
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.util.CopyFiles;
@@ -73,17 +73,17 @@
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter)
throws IOException {
- String text = ((UTF8)value).toString();
+ String text = ((Text)value).toString();
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
- output.collect((UTF8)value, new LongWritable(1));
+ output.collect((Text)value, new LongWritable(1));
}
}
}
/** A WritableComparator optimized for UTF8 keys of the logs. */
- public static class LogComparator extends UTF8.Comparator implements Configurable {
+ public static class LogComparator extends Text.Comparator implements Configurable {
private static Log LOG = LogFactory.getLog("org.apache.hadoop.tools.Logalyzer");
private JobConf conf = null;
@@ -119,12 +119,12 @@
}
try {
- UTF8 logline1 = new UTF8();
+ Text logline1 = new Text();
logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
String line1 = logline1.toString();
String[] logColumns1 = line1.split(columnSeparator);
- UTF8 logline2 = new UTF8();
+ Text logline2 = new Text();
logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
String line2 = logline2.toString();
String[] logColumns2 = line2.split(columnSeparator);
@@ -161,7 +161,7 @@
static {
// register this comparator
- WritableComparator.define(UTF8.class, new LogComparator());
+ WritableComparator.define(Text.class, new LogComparator());
}
}
@@ -209,8 +209,6 @@
grepJob.setInputPath(grepInput);
grepJob.setInputFormat(TextInputFormat.class);
- grepJob.setInputKeyClass(LongWritable.class);
- grepJob.setInputValueClass(UTF8.class);
grepJob.setMapperClass(LogRegexMapper.class);
grepJob.set("mapred.mapper.regex", grepPattern);
@@ -222,7 +220,7 @@
grepJob.setOutputPath(analysisOutput);
grepJob.setOutputFormat(TextOutputFormat.class);
- grepJob.setOutputKeyClass(UTF8.class);
+ grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.setOutputKeyComparatorClass(LogComparator.class);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/CopyFiles.java Tue Aug 15 13:19:25 2006
@@ -40,7 +40,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.ClusterStatus;
@@ -265,12 +265,10 @@
// turn off speculative execution, because DFS doesn't handle
// multiple writers to the same file.
jobConf.setSpeculativeExecution(false);
- jobConf.setInputKeyClass(UTF8.class);
- jobConf.setInputValueClass(UTF8.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
- jobConf.setOutputKeyClass(UTF8.class);
- jobConf.setOutputValueClass(UTF8.class);
+ jobConf.setOutputKeyClass(Text.class);
+ jobConf.setOutputValueClass(Text.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setMapperClass(DFSCopyFilesMapper.class);
@@ -332,10 +330,10 @@
for(int idx=0; idx < numMaps; ++idx) {
Path file = new Path(inDir, "part"+idx);
SequenceFile.Writer writer =
- new SequenceFile.Writer(fileSys, file, UTF8.class, UTF8.class);
+ new SequenceFile.Writer(fileSys, file, Text.class, Text.class);
for (int ipath = idx; ipath < nFiles; ipath += numMaps) {
String path = (String) finalPathList.get(ipath);
- writer.append(new UTF8(path), new UTF8(""));
+ writer.append(new Text(path), new Text(""));
}
writer.close();
}
@@ -388,7 +386,7 @@
Writable value,
OutputCollector out,
Reporter reporter) throws IOException {
- String src = ((UTF8) key).toString();
+ String src = ((Text) key).toString();
try {
copy(src, reporter);
} catch (IOException except) {
@@ -449,12 +447,10 @@
//Setup the MR-job configuration
jobConf.setSpeculativeExecution(false);
- jobConf.setInputKeyClass(UTF8.class);
- jobConf.setInputValueClass(UTF8.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
- jobConf.setOutputKeyClass(UTF8.class);
- jobConf.setOutputValueClass(UTF8.class);
+ jobConf.setOutputKeyClass(Text.class);
+ jobConf.setOutputValueClass(Text.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setMapperClass(HTTPCopyFilesMapper.class);
@@ -492,8 +488,8 @@
for(int i=0; i < srcPaths.length; ++i) {
Path ipFile = new Path(jobInputDir, "part" + i);
SequenceFile.Writer writer =
- new SequenceFile.Writer(fileSystem, ipFile, UTF8.class, UTF8.class);
- writer.append(new UTF8(srcPaths[i]), new UTF8(""));
+ new SequenceFile.Writer(fileSystem, ipFile, Text.class, Text.class);
+ writer.append(new Text(srcPaths[i]), new Text(""));
writer.close();
}
}
@@ -538,7 +534,7 @@
{
//The url of the file
try {
- srcURI = new URI(((UTF8)key).toString());
+ srcURI = new URI(((Text)key).toString());
//Construct the complete destination path
File urlPath = new File(srcURI.getPath());
@@ -574,11 +570,11 @@
" to: " + destinationPath.toString());
} catch(Exception e) {
- reporter.setStatus("Failed to copy from: " + (UTF8)key);
+ reporter.setStatus("Failed to copy from: " + (Text)key);
if(ignoreReadFailures) {
return;
} else {
- throw new IOException("Failed to copy from: " + (UTF8)key);
+ throw new IOException("Failed to copy from: " + (Text)key);
}
}
}
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMapRed.java Tue Aug 15 13:19:25 2006
@@ -108,7 +108,7 @@
int keyint = ((IntWritable) key).get();
while (it.hasNext()) {
int val = ((IntWritable) it.next()).get();
- out.collect(new UTF8("" + val), new UTF8(""));
+ out.collect(new Text("" + val), new Text(""));
}
}
public void close() {
@@ -137,7 +137,7 @@
public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
long pos = ((LongWritable) key).get();
- UTF8 str = (UTF8) val;
+ Text str = (Text) val;
out.collect(new IntWritable(Integer.parseInt(str.toString().trim())), new IntWritable(1));
}
@@ -231,8 +231,8 @@
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter
) throws IOException {
- String str = ((UTF8) value).toString().toLowerCase();
- output.collect(new UTF8(str), value);
+ String str = ((Text) value).toString().toLowerCase();
+ output.collect(new Text(str), value);
}
public void close() throws IOException {
@@ -299,8 +299,8 @@
conf.setOutputPath(outDir);
conf.setMapperClass(MyMap.class);
conf.setReducerClass(MyReduce.class);
- conf.setOutputKeyClass(UTF8.class);
- conf.setOutputValueClass(UTF8.class);
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(Text.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
if (includeCombine) {
conf.setCombinerClass(IdentityReducer.class);
@@ -456,8 +456,6 @@
fs.delete(intermediateOuts);
JobConf checkJob = new JobConf(conf);
checkJob.setInputPath(randomOuts);
- checkJob.setInputKeyClass(LongWritable.class);
- checkJob.setInputValueClass(UTF8.class);
checkJob.setInputFormat(TextInputFormat.class);
checkJob.setMapperClass(RandomCheckMapper.class);
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java Tue Aug 15 13:19:25 2006
@@ -24,7 +24,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.examples.WordCount;
/**
@@ -58,7 +58,7 @@
conf.setJobName("wordcount");
// the keys are words (strings)
- conf.setOutputKeyClass(UTF8.class);
+ conf.setOutputKeyClass(Text.class);
// the values are counts (ints)
conf.setOutputValueClass(IntWritable.class);
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java?rev=431692&r1=431691&r2=431692&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextInputFormat.java Tue Aug 15 13:19:25 2006
@@ -20,14 +20,11 @@
import java.util.*;
import junit.framework.TestCase;
-import org.apache.commons.logging.*;
-
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.*;
public class TestTextInputFormat extends TestCase {
- private static final Log LOG = InputFormatBase.LOG;
private static int MAX_LENGTH = 10000;
private static Configuration conf = new Configuration();
@@ -70,7 +67,7 @@
// try splitting the file in a variety of sizes
InputFormat format = new TextInputFormat();
LongWritable key = new LongWritable();
- UTF8 value = new UTF8();
+ Text value = new Text();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH/20)+1;
//LOG.info("splitting: requesting = " + numSplits);