You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/13 01:32:32 UTC

svn commit: r1201378 - /lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java

Author: rmuir
Date: Sun Nov 13 00:32:31 2011
New Revision: 1201378

URL: http://svn.apache.org/viewvc?rev=1201378&view=rev
Log:
LUCENE-2621: more checkindex for term vectors

Modified:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1201378&r1=1201377&r2=1201378&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Nov 13 00:32:31 2011
@@ -1150,7 +1150,7 @@ public class CheckIndex {
           Fields tfv = reader.getTermVectors(j);
           if (tfv != null) {
             int tfvComputedFieldCount = 0;
-            int tfvComputedTermCount = 0;
+            long tfvComputedTermCount = 0;
 
             FieldsEnum fieldsEnum = tfv.iterator();
             String field = null;
@@ -1160,9 +1160,50 @@ public class CheckIndex {
               
               Terms terms = tfv.terms(field);
               TermsEnum termsEnum = terms.iterator();
-              while (termsEnum.next() != null) {
-                tfvComputedTermCount++;
+              
+              long tfvComputedTermCountForField = 0;
+              long tfvComputedSumTotalTermFreq = 0;
+              
+              BytesRef term = null;
+              while ((term = termsEnum.next()) != null) {
+                tfvComputedTermCountForField++;
+                
+                if (termsEnum.docFreq() != 1) {
+                  throw new RuntimeException("vector docFreq for doc " + j + ", field " + field + ", term" + term + " != 1");
+                }
+                
+                long totalTermFreq = termsEnum.totalTermFreq();
+                if (totalTermFreq == -1) {
+                  tfvComputedSumTotalTermFreq = -1;
+                } else {
+                  if (totalTermFreq <= 0) {
+                    throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
+                  }
+                  tfvComputedSumTotalTermFreq += totalTermFreq;
+                }
+              }
+              
+              long uniqueTermCount = terms.getUniqueTermCount();
+              if (uniqueTermCount != -1 && uniqueTermCount != tfvComputedTermCountForField) {
+                throw new RuntimeException("vector term count for doc " + j + ", field " + field + " = " + uniqueTermCount + " != recomputed term count=" + tfvComputedTermCountForField);
+              }
+              
+              int docCount = terms.getDocCount();
+              if (docCount != -1 && docCount != 1) {
+                throw new RuntimeException("vector doc count for doc " + j + ", field " + field + " = " + docCount + " != 1");
               }
+              
+              long sumDocFreq = terms.getSumDocFreq();
+              if (sumDocFreq != -1 && sumDocFreq != tfvComputedTermCountForField) {
+                throw new RuntimeException("vector postings count for doc " + j + ", field " + field + " = " + sumDocFreq + " != recomputed postings count=" + tfvComputedTermCountForField);
+              }
+              
+              long sumTotalTermFreq = terms.getSumTotalTermFreq();
+              if (sumTotalTermFreq != -1 && sumTotalTermFreq != tfvComputedSumTotalTermFreq) {
+                throw new RuntimeException("vector sumTotalTermFreq for doc " + j + ", field " + field + " = " + sumTotalTermFreq + " != recomputed sumTotalTermFreq=" + tfvComputedSumTotalTermFreq);
+              }
+              
+              tfvComputedTermCount += tfvComputedTermCountForField;
             }
             
             // TODO: testTermIndex should check this stat too!