You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/13 02:37:46 UTC

svn commit: r1201385 - /lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java

Author: rmuir
Date: Sun Nov 13 01:37:46 2011
New Revision: 1201385

URL: http://svn.apache.org/viewvc?rev=1201385&view=rev
Log:
LUCENE-2621: term vector freq/pos checks

Modified:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1201385&r1=1201384&r2=1201385&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Nov 13 01:37:46 2011
@@ -1143,6 +1143,8 @@ public class CheckIndex {
       }
 
       // TODO: maybe we can factor out testTermIndex and reuse here?
+      DocsEnum docs = null;
+      DocsAndPositionsEnum postings = null;
       final Bits liveDocs = reader.getLiveDocs();
       for (int j = 0; j < info.docCount; ++j) {
         if (liveDocs == null || liveDocs.get(j)) {
@@ -1173,13 +1175,55 @@ public class CheckIndex {
                 }
                 
                 long totalTermFreq = termsEnum.totalTermFreq();
-                if (totalTermFreq == -1) {
-                  tfvComputedSumTotalTermFreq = -1;
+                
+                if (totalTermFreq != -1 && totalTermFreq <= 0) {
+                  throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
+                }
+                
+                DocsEnum docsEnum;
+                DocsAndPositionsEnum dp = termsEnum.docsAndPositions(null, postings);
+                if (dp == null) {
+                  DocsEnum d = termsEnum.docs(null, docs);
+                  docsEnum = docs = d;
                 } else {
-                  if (totalTermFreq <= 0) {
-                    throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
+                  docsEnum = postings = dp;
+                }
+                  
+                final int doc = docsEnum.nextDoc();
+                  
+                if (doc != j) {
+                  throw new RuntimeException("vector for doc " + j + " references another document: " + doc);
+                }
+                  
+                final int tf = docsEnum.freq();
+                tfvComputedSumTotalTermFreq += tf;
+                
+                if (tf <= 0) {
+                  throw new RuntimeException("vector freq " + tf + " is out of bounds");
+                }
+                
+                if (totalTermFreq != -1 && totalTermFreq != tf) {
+                  throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
+                }
+                
+                if (dp != null) {
+                  int lastPosition = -1;
+                  for (int i = 0; i < tf; i++) {
+                    int pos = dp.nextPosition();
+                    if (pos != -1 && pos < 0) {
+                      throw new RuntimeException("vector position " + pos + " is out of bounds");
+                    }
+                    
+                    if (pos < lastPosition) {
+                      throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+                    }
+                    
+                    lastPosition = pos;
                   }
-                  tfvComputedSumTotalTermFreq += totalTermFreq;
+                }
+                  
+                if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                  throw new RuntimeException("vector for doc " + j + " references multiple documents!");
                 }
               }