You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/06/26 20:10:03 UTC

svn commit: r788800 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/CheckIndex.java src/test/org/apache/lucene/index/TestCheckIndex.java

Author: mikemccand
Date: Fri Jun 26 18:10:02 2009
New Revision: 788800

URL: http://svn.apache.org/viewvc?rev=788800&view=rev
Log:
LUCENE-1625: return more status details in CheckIndex, broken out by component into separate status classes

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jun 26 18:10:02 2009
@@ -276,6 +276,11 @@
 26. LUCENE-1703: Add IndexWriter.waitForMerges.  (Tim Smith via Mike
     McCandless)
 
+27. LUCENE-1625: CheckIndex's programmatic API now returns separate
+    classes detailing the status of each component in the index, and
+    includes more detailed status than previously.  (Tim Smith via
+    Mike McCandless)
+
 Bug fixes
 
 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java Fri Jun 26 18:10:02 2009
@@ -179,6 +179,76 @@
        *  debugging details that IndexWriter records into
        *  each segment it creates */
       public Map diagnostics;
+
+      /** Status for testing of field norms (null if field norms could not be tested). */
+      public FieldNormStatus fieldNormStatus;
+
+      /** Status for testing of indexed terms (null if indexed terms could not be tested). */
+      public TermIndexStatus termIndexStatus;
+
+      /** Status for testing of stored fields (null if stored fields could not be tested). */
+      public StoredFieldStatus storedFieldStatus;
+
+      /** Status for testing of term vectors (null if term vectors could not be tested). */
+      public TermVectorStatus termVectorStatus;
+    }
+
+    /**
+     * Status from testing field norms.
+     */
+    public static final class FieldNormStatus {
+      /** Number of fields successfully tested */
+      public long totFields = 0L;
+
+      /** Exception thrown during term index test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing term index.
+     */
+    public static final class TermIndexStatus {
+      /** Total term count */
+      public long termCount = 0L;
+
+      /** Total frequency across all terms. */
+      public long totFreq = 0L;
+      
+      /** Total number of positions. */
+      public long totPos = 0L;
+
+      /** Exception thrown during term index test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing stored fields.
+     */
+    public static final class StoredFieldStatus {
+      
+      /** Number of documents tested. */
+      public int docCount = 0;
+      
+      /** Total number of stored fields tested. */
+      public long totFields = 0;
+      
+      /** Exception thrown during stored fields test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing stored fields.
+     */
+    public static final class TermVectorStatus {
+      
+      /** Number of documents tested. */
+      public int docCount = 0;
+      
+      /** Total number of term vectors tested. */
+      public long totVectors = 0;
+      
+      /** Exception thrown during term vector test (null on success) */
+      public Throwable error = null;
     }
   }
 
@@ -443,107 +513,38 @@
         if (reader.maxDoc() != info.docCount)
           throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
 
-        if (infoStream != null)
-          infoStream.print("    test: fields, norms.......");
+        // Test getFieldNames()
+        if (infoStream != null) {
+          infoStream.print("    test: fields..............");
+        }         
         Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
-        Iterator it = fieldNames.iterator();
-        final byte[] b = new byte[reader.maxDoc()];
-        while(it.hasNext()) {
-          final String fieldName = (String) it.next();
-          reader.norms(fieldName, b, 0);
-        }
         msg("OK [" + fieldNames.size() + " fields]");
         segInfoStat.numFields = fieldNames.size();
-        if (infoStream != null)
-          infoStream.print("    test: terms, freq, prox...");
-        final TermEnum termEnum = reader.terms();
-        final TermPositions termPositions = reader.termPositions();
-
-        // Used only to count up # deleted docs for this
-        // term
-        final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
-
-        long termCount = 0;
-        long totFreq = 0;
-        long totPos = 0;
-        final int maxDoc = reader.maxDoc();
-
-        while(termEnum.next()) {
-          termCount++;
-          final Term term = termEnum.term();
-          final int docFreq = termEnum.docFreq();
-          termPositions.seek(term);
-          int lastDoc = -1;
-          int freq0 = 0;
-          totFreq += docFreq;
-          while(termPositions.next()) {
-            freq0++;
-            final int doc = termPositions.doc();
-            final int freq = termPositions.freq();
-            if (doc <= lastDoc)
-              throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
-            if (doc >= maxDoc)
-              throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
-
-            lastDoc = doc;
-            if (freq <= 0)
-              throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
-            
-            int lastPos = -1;
-            totPos += freq;
-            for(int j=0;j<freq;j++) {
-              final int pos = termPositions.nextPosition();
-              if (pos < -1)
-                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
-              if (pos < lastPos)
-                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
-            }
-          }
-
-          // Now count how many deleted docs occurred in
-          // this term:
-          final int delCount;
-          if (reader.hasDeletions()) {
-            myTermDocs.seek(term);
-            while(myTermDocs.next()) {
-            }
-            delCount = myTermDocs.delCount;
-          } else
-            delCount = 0;
-
-          if (freq0 + delCount != docFreq)
-            throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+        
+        // Test Field Norms
+        segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
+
+        // Test the Term Index
+        segInfoStat.termIndexStatus = testTermIndex(info, reader);
+
+        // Test Stored Fields
+        segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
+
+        // Test Term Vectors
+        segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+
+        // Rethrow the first exception we encountered
+        //  This will cause stats for failed segments to be incremented properly
+        if (segInfoStat.fieldNormStatus.error != null) {
+          throw new RuntimeException("Field Norm test failed");
+        } else if (segInfoStat.termIndexStatus.error != null) {
+          throw new RuntimeException("Term Index test failed");
+        } else if (segInfoStat.storedFieldStatus.error != null) {
+          throw new RuntimeException("Stored Field test failed");
+        } else if (segInfoStat.termVectorStatus.error != null) {
+          throw new RuntimeException("Term Vector test failed");
         }
 
-        msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
-
-        if (infoStream != null)
-          infoStream.print("    test: stored fields.......");
-        int docCount = 0;
-        long totFields = 0;
-        for(int j=0;j<info.docCount;j++)
-          if (!reader.isDeleted(j)) {
-            docCount++;
-            Document doc = reader.document(j);
-            totFields += doc.getFields().size();
-          }
-
-        if (docCount != reader.numDocs())
-          throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
-
-        msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
-
-        if (infoStream != null)
-          infoStream.print("    test: term vectors........");
-        int totVectors = 0;
-        for(int j=0;j<info.docCount;j++)
-          if (!reader.isDeleted(j)) {
-            TermFreqVector[] tfv = reader.getTermFreqVectors(j);
-            if (tfv != null)
-              totVectors += tfv.length;
-          }
-
-        msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
         msg("");
 
       } catch (Throwable t) {
@@ -574,7 +575,191 @@
 
     return result;
   }
+
+  /**
+   * Test field norms.
+   */
+  private Status.FieldNormStatus testFieldNorms(Collection fieldNames, SegmentReader reader) {
+    final Status.FieldNormStatus status = new Status.FieldNormStatus();
+
+    try {
+      // Test Field Norms
+      if (infoStream != null) {
+        infoStream.print("    test: field norms.........");
+      }
+      Iterator it = fieldNames.iterator();
+      final byte[] b = new byte[reader.maxDoc()];
+      while (it.hasNext()) {
+        final String fieldName = (String) it.next();
+        reader.norms(fieldName, b, 0);
+        ++status.totFields;
+      }
+
+      msg("OK [" + status.totFields + " fields]");
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+
+  /**
+   * Test the term index.
+   */
+  private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
+    final Status.TermIndexStatus status = new Status.TermIndexStatus();
+
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: terms, freq, prox...");
+      }
+
+      final TermEnum termEnum = reader.terms();
+      final TermPositions termPositions = reader.termPositions();
+
+      // Used only to count up # deleted docs for this term
+      final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+      final int maxDoc = reader.maxDoc();
+
+      while (termEnum.next()) {
+        status.termCount++;
+        final Term term = termEnum.term();
+        final int docFreq = termEnum.docFreq();
+        termPositions.seek(term);
+        int lastDoc = -1;
+        int freq0 = 0;
+        status.totFreq += docFreq;
+        while (termPositions.next()) {
+          freq0++;
+          final int doc = termPositions.doc();
+          final int freq = termPositions.freq();
+          if (doc <= lastDoc)
+            throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+          if (doc >= maxDoc)
+            throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+
+          lastDoc = doc;
+          if (freq <= 0)
+            throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+            
+          int lastPos = -1;
+          status.totPos += freq;
+          for(int j=0;j<freq;j++) {
+            final int pos = termPositions.nextPosition();
+            if (pos < -1)
+              throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+            if (pos < lastPos)
+              throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+          }
+        }
+
+        // Now count how many deleted docs occurred in
+        // this term:
+        final int delCount;
+        if (reader.hasDeletions()) {
+          myTermDocs.seek(term);
+          while(myTermDocs.next()) { }
+          delCount = myTermDocs.delCount;
+        } else {
+          delCount = 0; 
+        }
+
+        if (freq0 + delCount != docFreq) {
+          throw new RuntimeException("term " + term + " docFreq=" + 
+                                     docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+        }
+      }
+
+      msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
+
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
   
+  /**
+   * Test stored fields for a segment.
+   */
+  private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+    final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
+
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: stored fields.......");
+      }
+
+      // Scan stored fields for all documents
+      for (int j = 0; j < info.docCount; ++j) {
+        if (!reader.isDeleted(j)) {
+          status.docCount++;
+          Document doc = reader.document(j);
+          status.totFields += doc.getFields().size();
+        }
+      }      
+
+      // Validate docCount
+      if (status.docCount != reader.numDocs()) {
+        throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
+      }
+
+      msg("OK [" + status.totFields + " total field count; avg " + 
+          format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");      
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+
+  /**
+   * Test term vectors for a segment.
+   */
+  private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+    final Status.TermVectorStatus status = new Status.TermVectorStatus();
+    
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: term vectors........");
+      }
+
+      for (int j = 0; j < info.docCount; ++j) {
+        if (!reader.isDeleted(j)) {
+          status.docCount++;
+          TermFreqVector[] tfv = reader.getTermFreqVectors(j);
+          if (tfv != null) {
+            status.totVectors += tfv.length;
+          }
+        }
+      }
+      
+      msg("OK [" + status.totVectors + " total vector count; avg " + 
+          format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+    
+    return status;
+  }
+
   /** Repairs the index using previously returned result
    *  from {@link #checkIndex}.  Note that this does not
    *  remove any of the unreferenced files after it's done;

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java Fri Jun 26 18:10:02 2009
@@ -50,6 +50,7 @@
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
     CheckIndex checker = new CheckIndex(dir);
     checker.setInfoStream(new PrintStream(bos));
+    //checker.setInfoStream(System.out);
     CheckIndex.Status indexStatus = checker.checkIndex();
     if (indexStatus.clean == false) {
       System.out.println("CheckIndex failed");
@@ -61,6 +62,27 @@
     assertTrue(seg.openReaderPassed);
 
     assertNotNull(seg.diagnostics);
+    
+    assertNotNull(seg.fieldNormStatus);
+    assertNull(seg.fieldNormStatus.error);
+    assertEquals(1, seg.fieldNormStatus.totFields);
+
+    assertNotNull(seg.termIndexStatus);
+    assertNull(seg.termIndexStatus.error);
+    assertEquals(1, seg.termIndexStatus.termCount);
+    assertEquals(19, seg.termIndexStatus.totFreq);
+    assertEquals(18, seg.termIndexStatus.totPos);
+
+    assertNotNull(seg.storedFieldStatus);
+    assertNull(seg.storedFieldStatus.error);
+    assertEquals(18, seg.storedFieldStatus.docCount);
+    assertEquals(18, seg.storedFieldStatus.totFields);
+
+    assertNotNull(seg.termVectorStatus);
+    assertNull(seg.termVectorStatus.error);
+    assertEquals(18, seg.termVectorStatus.docCount);
+    assertEquals(18, seg.termVectorStatus.totVectors);
+
     assertTrue(seg.diagnostics.size() > 0);
     final List onlySegments = new ArrayList();
     onlySegments.add("_0");