You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/04/25 00:13:54 UTC
svn commit: r532105 -
/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
Author: ab
Date: Tue Apr 24 15:13:53 2007
New Revision: 532105
URL: http://svn.apache.org/viewvc?view=rev&rev=532105
Log:
Prevent NPE when working with small, possibly empty indexes.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?view=diff&rev=532105&r1=532104&r2=532105
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Tue Apr 24 15:13:53 2007
@@ -158,19 +158,28 @@
public class DDRecordReader implements RecordReader {
private IndexReader indexReader;
- private int maxDoc;
- private int doc;
+ private int maxDoc = 0;
+ private int doc = 0;
private Text index;
public DDRecordReader(FileSplit split, JobConf job,
Text index) throws IOException {
- indexReader = IndexReader.open(new FsDirectory(FileSystem.get(job), split.getPath(), false, job));
- maxDoc = indexReader.maxDoc();
+ try {
+ indexReader = IndexReader.open(new FsDirectory(FileSystem.get(job), split.getPath(), false, job));
+ maxDoc = indexReader.maxDoc();
+ } catch (IOException ioe) {
+ LOG.warn("Can't open index at " + split + ", skipping. (" + ioe.getMessage() + ")");
+ indexReader = null;
+ }
this.index = index;
}
public boolean next(Writable key, Writable value)
throws IOException {
+
+ // skip empty indexes
+ if (indexReader == null || maxDoc <= 0)
+ return false;
// skip deleted documents
while (indexReader.isDeleted(doc) && doc < maxDoc) doc++;