You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2017/12/05 09:39:43 UTC

[nutch] 03/05: Allow index removals even if dbDatum is null.

This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 9854f7af644a68db884f1b03eaf69359019e212e
Author: YossiTamari <33...@users.noreply.github.com>
AuthorDate: Wed Nov 8 17:13:05 2017 +0200

    Allow index removals even if dbDatum is null.
---
 src/java/org/apache/nutch/indexer/IndexerMapReduce.java | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
index 12d379e..7e3438c 100644
--- a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
+++ b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
@@ -238,18 +238,18 @@ public class IndexerMapReduce extends Configured implements
     }
 
     // Whether to delete GONE or REDIRECTS
-    if (delete && fetchDatum != null && dbDatum != null) {
-      if (fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_GONE
-          || dbDatum.getStatus() == CrawlDatum.STATUS_DB_GONE) {
+    if (delete) {
+      if (fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_GONE
+          || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_GONE) {
         reporter.incrCounter("IndexerStatus", "deleted (gone)", 1);
         output.collect(key, DELETE_ACTION);
         return;
       }
 
-      if (fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_PERM
-          || fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_TEMP
-          || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_PERM
-          || dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_TEMP) {
+      if (fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_PERM
+          || fetchDatum != null && fetchDatum.getStatus() == CrawlDatum.STATUS_FETCH_REDIR_TEMP
+          || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_PERM
+          || dbDatum != null && dbDatum.getStatus() == CrawlDatum.STATUS_DB_REDIR_TEMP) {
         reporter.incrCounter("IndexerStatus", "deleted (redirects)", 1);
         output.collect(key, DELETE_ACTION);
         return;

-- 
To stop receiving notification emails like this one, please contact
"commits@nutch.apache.org" <co...@nutch.apache.org>.