You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2015/06/25 21:04:52 UTC
svn commit: r1687612 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/indexer/IndexerMapReduce.java
Author: snagel
Date: Thu Jun 25 19:04:52 2015
New Revision: 1687612
URL: http://svn.apache.org/r1687612
Log:
NUTCH-2041 indexer fails if linkdb is missing
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1687612&r1=1687611&r2=1687612&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jun 25 19:04:52 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2041 indexer fails if linkdb is missing (snagel)
+
* NUTCH-2016 Remove unused class OldFetcher (snagel)
* NUTCH-2000 Link inversion fails with .locked already exists (jnioche, snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java?rev=1687612&r1=1687611&r2=1687612&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java Thu Jun 25 19:04:52 2015
@@ -23,6 +23,7 @@ import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -33,6 +34,7 @@ import org.apache.hadoop.mapred.OutputCo
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.crawl.Inlinks;
@@ -354,8 +356,20 @@ public class IndexerMapReduce extends Co
FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
- if (linkDb != null)
- FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
+ if (linkDb != null) {
+ Path currentLinkDb = new Path(linkDb, LinkDb.CURRENT_NAME);
+ try {
+ if (FileSystem.get(job).exists(currentLinkDb)) {
+ FileInputFormat.addInputPath(job, currentLinkDb);
+ } else {
+ LOG.warn("Ignoring linkDb for indexing, no linkDb found in path: {}",
+ linkDb);
+ }
+ } catch (IOException e) {
+ LOG.warn("Failed to use linkDb ({}) for indexing: {}", linkDb,
+ StringUtils.stringifyException(e));
+ }
+ }
job.setInputFormat(SequenceFileInputFormat.class);