You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/11/30 20:40:48 UTC

[lucene] branch branch_9x updated: LUCENE-10272: cross-check norms with postings in checkindex (#493)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new c89c78c  LUCENE-10272: cross-check norms with postings in checkindex (#493)
c89c78c is described below

commit c89c78cee0fbcd88b23d5772ea02f785865add7a
Author: Robert Muir <rm...@apache.org>
AuthorDate: Tue Nov 30 14:21:40 2021 -0500

    LUCENE-10272: cross-check norms with postings in checkindex (#493)
    
    Previously, CheckIndex would iterate norms and validate each one. But if norms that should be there were missing, nothing would fail. Now it computes an expected count of norms and ensures it saw them all.
---
 .../java/org/apache/lucene/index/CheckIndex.java   | 36 ++++++++++++++++++----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 285bed2..64bc28c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -2115,6 +2115,8 @@ public final class CheckIndex implements Closeable {
 
         if (fieldInfo.hasNorms() && isVectors == false) {
           final NumericDocValues norms = normsProducer.getNorms(fieldInfo);
+          // count of valid norm values found for the field
+          int actualCount = 0;
           // Cross-check terms with norms
           for (int doc = norms.nextDoc();
               doc != DocIdSetIterator.NO_MORE_DOCS;
@@ -2126,12 +2128,15 @@ public final class CheckIndex implements Closeable {
               continue;
             }
             final long norm = norms.longValue();
-            if (norm != 0 && visitedDocs.get(doc) == false) {
-              throw new CheckIndexException(
-                  "Document "
-                      + doc
-                      + " doesn't have terms according to postings but has a norm value that is not zero: "
-                      + Long.toUnsignedString(norm));
+            if (norm != 0) {
+              actualCount++;
+              if (visitedDocs.get(doc) == false) {
+                throw new CheckIndexException(
+                    "Document "
+                        + doc
+                        + " doesn't have terms according to postings but has a norm value that is not zero: "
+                        + Long.toUnsignedString(norm));
+              }
             } else if (norm == 0 && visitedDocs.get(doc)) {
               throw new CheckIndexException(
                   "Document "
@@ -2139,6 +2144,25 @@ public final class CheckIndex implements Closeable {
                       + " has terms according to postings but its norm value is 0, which may only be used on documents that have no terms");
             }
           }
+          int expectedCount = 0;
+          for (int doc = visitedDocs.nextSetBit(0);
+              doc != DocIdSetIterator.NO_MORE_DOCS;
+              doc =
+                  doc + 1 >= visitedDocs.length()
+                      ? DocIdSetIterator.NO_MORE_DOCS
+                      : visitedDocs.nextSetBit(doc + 1)) {
+            if (liveDocs != null && liveDocs.get(doc) == false) {
+              // Norms may only be out of sync with terms on deleted documents.
+              // This happens when a document fails indexing and in that case it
+              // should be immediately marked as deleted by the IndexWriter.
+              continue;
+            }
+            expectedCount++;
+          }
+          if (expectedCount != actualCount) {
+            throw new CheckIndexException(
+                "actual norm count: " + actualCount + " but expected: " + expectedCount);
+          }
         }
 
         // Test seek to last term: