You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2006/02/09 21:57:47 UTC
svn commit: r376435 - in /lucene/nutch/trunk: lib/
src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/
src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/parse/
src/java/org/apache/nutch/segment/
Author: cutting
Date: Thu Feb 9 12:57:44 2006
New Revision: 376435
URL: http://svn.apache.org/viewcvs?rev=376435&view=rev
Log:
Updating to latest Hadoop jar, adding now-required close() methods to mapper and reducer implementations.
Modified:
lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
Modified: lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/hadoop-0.1-dev.jar?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Thu Feb 9 12:57:44 2006
@@ -56,6 +56,7 @@
public static class CrawlDbStatMapper implements Mapper {
public void configure(JobConf job) {}
+ public void close() {}
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
throws IOException {
CrawlDatum cd = (CrawlDatum) value;
@@ -68,6 +69,7 @@
public static class CrawlDbStatReducer implements Reducer {
public void configure(JobConf job) {}
+ public void close() {}
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
throws IOException {
@@ -127,8 +129,8 @@
}
}
- public void configure(JobConf job) {
- }
+ public void configure(JobConf job) {}
+ public void close() {}
}
public void processStatJob(String crawlDb, Configuration config) throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Thu Feb 9 12:57:44 2006
@@ -30,6 +30,8 @@
retryMax = job.getInt("db.fetch.retry.max", 3);
}
+ public void close() {}
+
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)
throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Feb 9 12:57:44 2006
@@ -51,6 +51,8 @@
maxPerHost = job.getInt("generate.max.per.host", -1);
}
+ public void close() {}
+
/** Select & invert subset due for fetch. */
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter)
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Feb 9 12:57:44 2006
@@ -48,6 +48,8 @@
this.jobConf = job;
}
+ public void close() {}
+
public void map(WritableComparable key, Writable val,
OutputCollector output, Reporter reporter)
throws IOException {
@@ -73,6 +75,7 @@
/** Combine multiple new entries for a url. */
public static class InjectReducer implements Reducer {
public void configure(JobConf job) {}
+ public void close() {}
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Thu Feb 9 12:57:44 2006
@@ -57,6 +57,8 @@
ignoreInternalLinks = job.getBoolean("db.ignore.internal.links", true);
}
+ public void close() {}
+
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter)
throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java Thu Feb 9 12:57:44 2006
@@ -30,6 +30,8 @@
seed = job.getInt("partition.url.by.host.seed", 0);
}
+ public void close() {}
+
/** Hash by hostname. */
public int getPartition(WritableComparable key, Writable value,
int numReduceTasks) {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Thu Feb 9 12:57:44 2006
@@ -284,6 +284,8 @@
}
}
+ public void close() {}
+
public static boolean isParsing(Configuration conf) {
return conf.getBoolean("fetcher.parse", true);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Thu Feb 9 12:57:44 2006
@@ -205,6 +205,7 @@
public static class HashPartitioner implements Partitioner {
public void configure(JobConf job) {}
+ public void close() {}
public int getPartition(WritableComparable key, Writable value,
int numReduceTasks) {
int hashCode = ((HashScore)key).hash.hashCode();
@@ -215,6 +216,7 @@
public static class HashReducer implements Reducer {
private MD5Hash prevHash = new MD5Hash();
public void configure(JobConf job) {}
+ public void close() {}
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)
throws IOException {
@@ -246,6 +248,8 @@
throw new RuntimeException(e);
}
}
+
+ public void close() {}
/** Map [*,IndexDoc] pairs to [index,doc] pairs. */
public void map(WritableComparable key, Writable value,
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Thu Feb 9 12:57:44 2006
@@ -154,6 +154,8 @@
this.filters = new IndexingFilters(getConf());
}
+ public void close() {}
+
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)
throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Thu Feb 9 12:57:44 2006
@@ -48,6 +48,8 @@
setConf(job);
}
+ public void close() {}
+
public void map(WritableComparable key, Writable value,
OutputCollector output, Reporter reporter)
throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=376435&r1=376434&r2=376435&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu Feb 9 12:57:44 2006
@@ -118,8 +118,9 @@
super(conf);
}
- public void configure(JobConf job) {
- }
+ public void configure(JobConf job) {}
+
+ public void close() {}
public void reduce(WritableComparable key, Iterator values,
OutputCollector output, Reporter reporter)