You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/09/14 21:06:34 UTC
svn commit: r280915 - in /lucene/nutch/branches/mapred/src:
java/org/apache/nutch/crawl/ java/org/apache/nutch/mapred/
test/org/apache/nutch/mapred/
Author: cutting
Date: Wed Sep 14 12:06:30 2005
New Revision: 280915
URL: http://svn.apache.org/viewcvs?rev=280915&view=rev
Log:
Pass Reporter to getRecordReader().
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java
lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java Wed Sep 14 12:06:30 2005
@@ -46,7 +46,11 @@
* types in reduce. */
public static class InputFormat extends SequenceFileInputFormat {
public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
- JobConf job) throws IOException {
+ JobConf job, Reporter reporter)
+ throws IOException {
+
+ reporter.setStatus(split.toString());
+
return new SequenceFileRecordReader(fs, split) {
public synchronized boolean next(Writable key, Writable value)
throws IOException {
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java Wed Sep 14 12:06:30 2005
@@ -27,7 +27,7 @@
/** A section of an input file. Returned by {@link
* InputFormat#getSplits(NutchFileSystem, JobConf, int)} and passed to
- * InputFormat#getRecordReader(NutchFileSystem, FileSplit, JobConf). */
+ * InputFormat#getRecordReader(NutchFileSystem,FileSplit,JobConf,Reporter). */
public class FileSplit implements Writable {
private File file;
private long start;
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java Wed Sep 14 12:06:30 2005
@@ -45,6 +45,7 @@
* @return a {@link RecordReader}
*/
RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
- JobConf job) throws IOException;
+ JobConf job, Reporter reporter)
+ throws IOException;
}
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java Wed Sep 14 12:06:30 2005
@@ -42,7 +42,9 @@
public abstract RecordReader getRecordReader(NutchFileSystem fs,
FileSplit split,
- JobConf job) throws IOException;
+ JobConf job,
+ Reporter reporter)
+ throws IOException;
/** Subclasses may override to, e.g., select only files matching a regular
* expression.*/
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java Wed Sep 14 12:06:30 2005
@@ -90,7 +90,8 @@
}
final RecordReader rawIn = // open input
- job.getInputFormat().getRecordReader(NutchFileSystem.get(),split,job);
+ job.getInputFormat().getRecordReader
+ (NutchFileSystem.get(), split, job, reporter);
RecordReader in = new RecordReader() { // wrap in progress reporter
private float perByte = 1.0f /(float)split.getLength();
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java Wed Sep 14 12:06:30 2005
@@ -49,7 +49,10 @@
}
public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
- JobConf job) throws IOException {
+ JobConf job, Reporter reporter)
+ throws IOException {
+
+ reporter.setStatus(split.toString());
return new SequenceFileRecordReader(fs, split);
}
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java Wed Sep 14 12:06:30 2005
@@ -33,7 +33,10 @@
public class TextInputFormat extends InputFormatBase {
public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
- JobConf job) throws IOException {
+ JobConf job, Reporter reporter)
+ throws IOException {
+
+ reporter.setStatus(split.toString());
final long start = split.getStart();
final long end = start + split.getLength();
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java Wed Sep 14 12:06:30 2005
@@ -36,6 +36,10 @@
File dir = new File(System.getProperty("test.build.data",".") + "/mapred");
File file = new File(dir, "test.seq");
+ Reporter reporter = new Reporter() {
+ public void setStatus(String status) throws IOException {}
+ };
+
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
@@ -87,7 +91,8 @@
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
- RecordReader reader = format.getRecordReader(fs, splits[j], job);
+ RecordReader reader =
+ format.getRecordReader(fs, splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java Wed Sep 14 12:06:30 2005
@@ -35,6 +35,10 @@
NutchFileSystem fs = NutchFileSystem.getNamed("local");
File dir = new File(System.getProperty("test.build.data",".") + "/mapred");
File file = new File(dir, "test.txt");
+
+ Reporter reporter = new Reporter() {
+ public void setStatus(String status) throws IOException {}
+ };
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
@@ -81,7 +85,8 @@
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
- RecordReader reader = format.getRecordReader(fs, splits[j], job);
+ RecordReader reader =
+ format.getRecordReader(fs, splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {