You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2006/02/09 21:57:47 UTC

svn commit: r376435 - in /lucene/nutch/trunk: lib/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/segment/

Author: cutting
Date: Thu Feb  9 12:57:44 2006
New Revision: 376435

URL: http://svn.apache.org/viewcvs?rev=376435&view=rev
Log:
Updating to latest Hadoop jar, adding now-required close() methods to mapper and reducer implementations.

Modified:
    lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java

Modified: lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/hadoop-0.1-dev.jar?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Thu Feb  9 12:57:44 2006
@@ -56,6 +56,7 @@
 
   public static class CrawlDbStatMapper implements Mapper {
     public void configure(JobConf job) {}
+    public void close() {}
     public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
             throws IOException {
       CrawlDatum cd = (CrawlDatum) value;
@@ -68,6 +69,7 @@
 
   public static class CrawlDbStatReducer implements Reducer {
     public void configure(JobConf job) {}
+    public void close() {}
     public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
             throws IOException {
 
@@ -127,8 +129,8 @@
       }
     }
 
-    public void configure(JobConf job) {
-    }
+    public void configure(JobConf job) {}
+    public void close() {}
   }
   
   public void processStatJob(String crawlDb, Configuration config) throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Thu Feb  9 12:57:44 2006
@@ -30,6 +30,8 @@
     retryMax = job.getInt("db.fetch.retry.max", 3);
   }
 
+  public void close() {}
+
   public void reduce(WritableComparable key, Iterator values,
                      OutputCollector output, Reporter reporter)
     throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Feb  9 12:57:44 2006
@@ -51,6 +51,8 @@
       maxPerHost = job.getInt("generate.max.per.host", -1);
     }
 
+    public void close() {}
+
     /** Select & invert subset due for fetch. */
     public void map(WritableComparable key, Writable value,
                     OutputCollector output, Reporter reporter)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Feb  9 12:57:44 2006
@@ -48,6 +48,8 @@
       this.jobConf = job;
     }
 
+    public void close() {}
+
     public void map(WritableComparable key, Writable val,
                     OutputCollector output, Reporter reporter)
       throws IOException {
@@ -73,6 +75,7 @@
   /** Combine multiple new entries for a url. */
   public static class InjectReducer implements Reducer {
     public void configure(JobConf job) {}
+    public void close() {}
 
     public void reduce(WritableComparable key, Iterator values,
                        OutputCollector output, Reporter reporter)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Thu Feb  9 12:57:44 2006
@@ -57,6 +57,8 @@
     ignoreInternalLinks = job.getBoolean("db.ignore.internal.links", true);
   }
 
+  public void close() {}
+
   public void map(WritableComparable key, Writable value,
                   OutputCollector output, Reporter reporter)
     throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java Thu Feb  9 12:57:44 2006
@@ -30,6 +30,8 @@
     seed = job.getInt("partition.url.by.host.seed", 0);
   }
   
+  public void close() {}
+
   /** Hash by hostname. */
   public int getPartition(WritableComparable key, Writable value,
                           int numReduceTasks) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Thu Feb  9 12:57:44 2006
@@ -284,6 +284,8 @@
     }
   }
 
+  public void close() {}
+
   public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
   }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Thu Feb  9 12:57:44 2006
@@ -205,6 +205,7 @@
 
   public static class HashPartitioner implements Partitioner {
     public void configure(JobConf job) {}
+    public void close() {}
     public int getPartition(WritableComparable key, Writable value,
                             int numReduceTasks) {
       int hashCode = ((HashScore)key).hash.hashCode();
@@ -215,6 +216,7 @@
   public static class HashReducer implements Reducer {
     private MD5Hash prevHash = new MD5Hash();
     public void configure(JobConf job) {}
+    public void close() {}
     public void reduce(WritableComparable key, Iterator values,
                        OutputCollector output, Reporter reporter)
       throws IOException {
@@ -246,6 +248,8 @@
       throw new RuntimeException(e);
     }
   }
+
+  public void close() {}
 
   /** Map [*,IndexDoc] pairs to [index,doc] pairs. */
   public void map(WritableComparable key, Writable value,

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Thu Feb  9 12:57:44 2006
@@ -154,6 +154,8 @@
     this.filters = new IndexingFilters(getConf());
   }
 
+  public void close() {}
+
   public void reduce(WritableComparable key, Iterator values,
                      OutputCollector output, Reporter reporter)
     throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Thu Feb  9 12:57:44 2006
@@ -48,6 +48,8 @@
     setConf(job);
   }
 
+  public void close() {}
+
   public void map(WritableComparable key, Writable value,
                   OutputCollector output, Reporter reporter)
     throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu Feb  9 12:57:44 2006
@@ -118,8 +118,9 @@
     super(conf);
   }
 
-  public void configure(JobConf job) {
-  }
+  public void configure(JobConf job) {}
+
+  public void close() {}
 
   public void reduce(WritableComparable key, Iterator values,
                      OutputCollector output, Reporter reporter)