You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2017/12/05 09:39:43 UTC
[nutch] 03/05: Allow index removals even if dbDatum is null.
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 9854f7af644a68db884f1b03eaf69359019e212e
Author: YossiTamari <33...@users.noreply.github.com>
AuthorDate: Wed Nov 8 17:13:05 2017 +0200
Allow index removals even if dbDatum is null.
---
src/java/org/apache/nutch/indexer/IndexerMapReduce.java | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
index 12d379e..7e3438c 100644
--- a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
+++ b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
@@ -238,18 +238,18 @@ public class IndexerMapReduce extends Configured implements
}
// Whether to delete GONE or REDIRECTS
- if (delete && fetchDatum != null && dbDatum != null) {
- if (fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_GONE
- || dbDatum.getStatus() == CrawlDatum.STATUS_DB_GONE) {
+ if (delete) {
+ if (fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_GONE
+ || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_GONE) {
reporter.incrCounter("IndexerStatus", "deleted (gone)", 1);
output.collect(key, DELETE_ACTION);
return;
}
- if (fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_PERM
- || fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_TEMP
- || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_PERM
- || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_TEMP) {
+ if (fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_PERM
+ || fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_TEMP
+ || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_PERM
+ || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_TEMP) {
reporter.incrCounter("IndexerStatus", "deleted (redirects)", 1);
output.collect(key, DELETE_ACTION);
return;
--
To stop receiving notification emails like this one, please contact
"commits@nutch.apache.org" <co...@nutch.apache.org>.