You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/09/14 21:06:34 UTC

svn commit: r280915 - in /lucene/nutch/branches/mapred/src: java/org/apache/nutch/crawl/ java/org/apache/nutch/mapred/ test/org/apache/nutch/mapred/

Author: cutting
Date: Wed Sep 14 12:06:30 2005
New Revision: 280915

URL: http://svn.apache.org/viewcvs?rev=280915&view=rev
Log:
Pass Reporter to getRecordReader().

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java Wed Sep 14 12:06:30 2005
@@ -46,7 +46,11 @@
    * types in reduce. */
   public static class InputFormat extends SequenceFileInputFormat {
     public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
-                                        JobConf job) throws IOException {
+                                        JobConf job, Reporter reporter)
+      throws IOException {
+
+      reporter.setStatus(split.toString());
+      
       return new SequenceFileRecordReader(fs, split) {
           public synchronized boolean next(Writable key, Writable value)
             throws IOException {

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/FileSplit.java Wed Sep 14 12:06:30 2005
@@ -27,7 +27,7 @@
 
 /** A section of an input file.  Returned by {@link
  * InputFormat#getSplits(NutchFileSystem, JobConf, int)} and passed to
- * InputFormat#getRecordReader(NutchFileSystem, FileSplit, JobConf). */
+ * InputFormat#getRecordReader(NutchFileSystem,FileSplit,JobConf,Reporter). */
 public class FileSplit implements Writable {
   private File file;
   private long start;

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormat.java Wed Sep 14 12:06:30 2005
@@ -45,6 +45,7 @@
    * @return a {@link RecordReader}
    */
   RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
-                               JobConf job) throws IOException;
+                               JobConf job, Reporter reporter)
+    throws IOException;
 }
 

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/InputFormatBase.java Wed Sep 14 12:06:30 2005
@@ -42,7 +42,9 @@
 
   public abstract RecordReader getRecordReader(NutchFileSystem fs,
                                                FileSplit split,
-                                               JobConf job) throws IOException;
+                                               JobConf job,
+                                               Reporter reporter)
+    throws IOException;
 
   /** Subclasses may override to, e.g., select only files matching a regular
    * expression.*/ 

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapTask.java Wed Sep 14 12:06:30 2005
@@ -90,7 +90,8 @@
       }
 
       final RecordReader rawIn =                  // open input
-        job.getInputFormat().getRecordReader(NutchFileSystem.get(),split,job);
+        job.getInputFormat().getRecordReader
+        (NutchFileSystem.get(), split, job, reporter);
 
       RecordReader in = new RecordReader() {      // wrap in progress reporter
           private float perByte = 1.0f /(float)split.getLength();

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java Wed Sep 14 12:06:30 2005
@@ -49,7 +49,10 @@
   }
 
   public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
-                                      JobConf job) throws IOException {
+                                      JobConf job, Reporter reporter)
+    throws IOException {
+
+    reporter.setStatus(split.toString());
 
     return new SequenceFileRecordReader(fs, split);
   }

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/TextInputFormat.java Wed Sep 14 12:06:30 2005
@@ -33,7 +33,10 @@
 public class TextInputFormat extends InputFormatBase {
 
   public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
-                                      JobConf job) throws IOException {
+                                      JobConf job, Reporter reporter)
+    throws IOException {
+
+    reporter.setStatus(split.toString());
 
     final long start = split.getStart();
     final long end = start + split.getLength();

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java Wed Sep 14 12:06:30 2005
@@ -36,6 +36,10 @@
     File dir = new File(System.getProperty("test.build.data",".") + "/mapred");
     File file = new File(dir, "test.seq");
     
+    Reporter reporter = new Reporter() {
+        public void setStatus(String status) throws IOException {}
+      };
+    
     int seed = new Random().nextInt();
     //LOG.info("seed = "+seed);
     Random random = new Random(seed);
@@ -87,7 +91,8 @@
         // check each split
         BitSet bits = new BitSet(length);
         for (int j = 0; j < splits.length; j++) {
-          RecordReader reader = format.getRecordReader(fs, splits[j], job);
+          RecordReader reader =
+            format.getRecordReader(fs, splits[j], job, reporter);
           try {
             int count = 0;
             while (reader.next(key, value)) {

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java?rev=280915&r1=280914&r2=280915&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java Wed Sep 14 12:06:30 2005
@@ -35,6 +35,10 @@
     NutchFileSystem fs = NutchFileSystem.getNamed("local");
     File dir = new File(System.getProperty("test.build.data",".") + "/mapred");
     File file = new File(dir, "test.txt");
+
+    Reporter reporter = new Reporter() {
+        public void setStatus(String status) throws IOException {}
+      };
     
     int seed = new Random().nextInt();
     //LOG.info("seed = "+seed);
@@ -81,7 +85,8 @@
         // check each split
         BitSet bits = new BitSet(length);
         for (int j = 0; j < splits.length; j++) {
-          RecordReader reader = format.getRecordReader(fs, splits[j], job);
+          RecordReader reader =
+            format.getRecordReader(fs, splits[j], job, reporter);
           try {
             int count = 0;
             while (reader.next(key, value)) {