You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/06/26 20:10:03 UTC
svn commit: r788800 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/index/CheckIndex.java
src/test/org/apache/lucene/index/TestCheckIndex.java
Author: mikemccand
Date: Fri Jun 26 18:10:02 2009
New Revision: 788800
URL: http://svn.apache.org/viewvc?rev=788800&view=rev
Log:
LUCENE-1625: return more status details in CheckIndex, broken out by component into separate status classes
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jun 26 18:10:02 2009
@@ -276,6 +276,11 @@
26. LUCENE-1703: Add IndexWriter.waitForMerges. (Tim Smith via Mike
McCandless)
+27. LUCENE-1625: CheckIndex's programmatic API now returns separate
+ classes detailing the status of each component in the index, and
+ includes more detailed status than previously. (Tim Smith via
+ Mike McCandless)
+
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java Fri Jun 26 18:10:02 2009
@@ -179,6 +179,76 @@
* debugging details that IndexWriter records into
* each segment it creates */
public Map diagnostics;
+
+ /** Status for testing of field norms (null if field norms could not be tested). */
+ public FieldNormStatus fieldNormStatus;
+
+ /** Status for testing of indexed terms (null if indexed terms could not be tested). */
+ public TermIndexStatus termIndexStatus;
+
+ /** Status for testing of stored fields (null if stored fields could not be tested). */
+ public StoredFieldStatus storedFieldStatus;
+
+ /** Status for testing of term vectors (null if term vectors could not be tested). */
+ public TermVectorStatus termVectorStatus;
+ }
+
+ /**
+ * Status from testing field norms.
+ */
+ public static final class FieldNormStatus {
+ /** Number of fields successfully tested */
+ public long totFields = 0L;
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing term index.
+ */
+ public static final class TermIndexStatus {
+ /** Total term count */
+ public long termCount = 0L;
+
+ /** Total frequency across all terms. */
+ public long totFreq = 0L;
+
+ /** Total number of positions. */
+ public long totPos = 0L;
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing stored fields.
+ */
+ public static final class StoredFieldStatus {
+
+ /** Number of documents tested. */
+ public int docCount = 0;
+
+ /** Total number of stored fields tested. */
+ public long totFields = 0;
+
+ /** Exception thrown during stored fields test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing stored fields.
+ */
+ public static final class TermVectorStatus {
+
+ /** Number of documents tested. */
+ public int docCount = 0;
+
+ /** Total number of term vectors tested. */
+ public long totVectors = 0;
+
+ /** Exception thrown during term vector test (null on success) */
+ public Throwable error = null;
}
}
@@ -443,107 +513,38 @@
if (reader.maxDoc() != info.docCount)
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
- if (infoStream != null)
- infoStream.print(" test: fields, norms.......");
+ // Test getFieldNames()
+ if (infoStream != null) {
+ infoStream.print(" test: fields..............");
+ }
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
- Iterator it = fieldNames.iterator();
- final byte[] b = new byte[reader.maxDoc()];
- while(it.hasNext()) {
- final String fieldName = (String) it.next();
- reader.norms(fieldName, b, 0);
- }
msg("OK [" + fieldNames.size() + " fields]");
segInfoStat.numFields = fieldNames.size();
- if (infoStream != null)
- infoStream.print(" test: terms, freq, prox...");
- final TermEnum termEnum = reader.terms();
- final TermPositions termPositions = reader.termPositions();
-
- // Used only to count up # deleted docs for this
- // term
- final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
-
- long termCount = 0;
- long totFreq = 0;
- long totPos = 0;
- final int maxDoc = reader.maxDoc();
-
- while(termEnum.next()) {
- termCount++;
- final Term term = termEnum.term();
- final int docFreq = termEnum.docFreq();
- termPositions.seek(term);
- int lastDoc = -1;
- int freq0 = 0;
- totFreq += docFreq;
- while(termPositions.next()) {
- freq0++;
- final int doc = termPositions.doc();
- final int freq = termPositions.freq();
- if (doc <= lastDoc)
- throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
- if (doc >= maxDoc)
- throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
-
- lastDoc = doc;
- if (freq <= 0)
- throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
-
- int lastPos = -1;
- totPos += freq;
- for(int j=0;j<freq;j++) {
- final int pos = termPositions.nextPosition();
- if (pos < -1)
- throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
- if (pos < lastPos)
- throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
- }
- }
-
- // Now count how many deleted docs occurred in
- // this term:
- final int delCount;
- if (reader.hasDeletions()) {
- myTermDocs.seek(term);
- while(myTermDocs.next()) {
- }
- delCount = myTermDocs.delCount;
- } else
- delCount = 0;
-
- if (freq0 + delCount != docFreq)
- throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+
+ // Test Field Norms
+ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
+
+ // Test the Term Index
+ segInfoStat.termIndexStatus = testTermIndex(info, reader);
+
+ // Test Stored Fields
+ segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
+
+ // Test Term Vectors
+ segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+
+ // Rethrow the first exception we encountered
+ // This will cause stats for failed segments to be incremented properly
+ if (segInfoStat.fieldNormStatus.error != null) {
+ throw new RuntimeException("Field Norm test failed");
+ } else if (segInfoStat.termIndexStatus.error != null) {
+ throw new RuntimeException("Term Index test failed");
+ } else if (segInfoStat.storedFieldStatus.error != null) {
+ throw new RuntimeException("Stored Field test failed");
+ } else if (segInfoStat.termVectorStatus.error != null) {
+ throw new RuntimeException("Term Vector test failed");
}
- msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
-
- if (infoStream != null)
- infoStream.print(" test: stored fields.......");
- int docCount = 0;
- long totFields = 0;
- for(int j=0;j<info.docCount;j++)
- if (!reader.isDeleted(j)) {
- docCount++;
- Document doc = reader.document(j);
- totFields += doc.getFields().size();
- }
-
- if (docCount != reader.numDocs())
- throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
-
- msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
-
- if (infoStream != null)
- infoStream.print(" test: term vectors........");
- int totVectors = 0;
- for(int j=0;j<info.docCount;j++)
- if (!reader.isDeleted(j)) {
- TermFreqVector[] tfv = reader.getTermFreqVectors(j);
- if (tfv != null)
- totVectors += tfv.length;
- }
-
- msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
msg("");
} catch (Throwable t) {
@@ -574,7 +575,191 @@
return result;
}
+
+ /**
+ * Test field norms.
+ */
+ private Status.FieldNormStatus testFieldNorms(Collection fieldNames, SegmentReader reader) {
+ final Status.FieldNormStatus status = new Status.FieldNormStatus();
+
+ try {
+ // Test Field Norms
+ if (infoStream != null) {
+ infoStream.print(" test: field norms.........");
+ }
+ Iterator it = fieldNames.iterator();
+ final byte[] b = new byte[reader.maxDoc()];
+ while (it.hasNext()) {
+ final String fieldName = (String) it.next();
+ reader.norms(fieldName, b, 0);
+ ++status.totFields;
+ }
+
+ msg("OK [" + status.totFields + " fields]");
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+
+ return status;
+ }
+
+ /**
+ * Test the term index.
+ */
+ private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
+ final Status.TermIndexStatus status = new Status.TermIndexStatus();
+
+ try {
+ if (infoStream != null) {
+ infoStream.print(" test: terms, freq, prox...");
+ }
+
+ final TermEnum termEnum = reader.terms();
+ final TermPositions termPositions = reader.termPositions();
+
+ // Used only to count up # deleted docs for this term
+ final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+ final int maxDoc = reader.maxDoc();
+
+ while (termEnum.next()) {
+ status.termCount++;
+ final Term term = termEnum.term();
+ final int docFreq = termEnum.docFreq();
+ termPositions.seek(term);
+ int lastDoc = -1;
+ int freq0 = 0;
+ status.totFreq += docFreq;
+ while (termPositions.next()) {
+ freq0++;
+ final int doc = termPositions.doc();
+ final int freq = termPositions.freq();
+ if (doc <= lastDoc)
+ throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ if (doc >= maxDoc)
+ throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+
+ lastDoc = doc;
+ if (freq <= 0)
+ throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+
+ int lastPos = -1;
+ status.totPos += freq;
+ for(int j=0;j<freq;j++) {
+ final int pos = termPositions.nextPosition();
+ if (pos < -1)
+ throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+ if (pos < lastPos)
+ throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+ }
+ }
+
+ // Now count how many deleted docs occurred in
+ // this term:
+ final int delCount;
+ if (reader.hasDeletions()) {
+ myTermDocs.seek(term);
+ while(myTermDocs.next()) { }
+ delCount = myTermDocs.delCount;
+ } else {
+ delCount = 0;
+ }
+
+ if (freq0 + delCount != docFreq) {
+ throw new RuntimeException("term " + term + " docFreq=" +
+ docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+ }
+ }
+
+ msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
+
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+
+ return status;
+ }
+ /**
+ * Test stored fields for a segment.
+ */
+ private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+ final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
+
+ try {
+ if (infoStream != null) {
+ infoStream.print(" test: stored fields.......");
+ }
+
+ // Scan stored fields for all documents
+ for (int j = 0; j < info.docCount; ++j) {
+ if (!reader.isDeleted(j)) {
+ status.docCount++;
+ Document doc = reader.document(j);
+ status.totFields += doc.getFields().size();
+ }
+ }
+
+ // Validate docCount
+ if (status.docCount != reader.numDocs()) {
+ throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
+ }
+
+ msg("OK [" + status.totFields + " total field count; avg " +
+ format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+
+ return status;
+ }
+
+ /**
+ * Test term vectors for a segment.
+ */
+ private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+ final Status.TermVectorStatus status = new Status.TermVectorStatus();
+
+ try {
+ if (infoStream != null) {
+ infoStream.print(" test: term vectors........");
+ }
+
+ for (int j = 0; j < info.docCount; ++j) {
+ if (!reader.isDeleted(j)) {
+ status.docCount++;
+ TermFreqVector[] tfv = reader.getTermFreqVectors(j);
+ if (tfv != null) {
+ status.totVectors += tfv.length;
+ }
+ }
+ }
+
+ msg("OK [" + status.totVectors + " total vector count; avg " +
+ format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+
+ return status;
+ }
+
/** Repairs the index using previously returned result
* from {@link #checkIndex}. Note that this does not
* remove any of the unreferenced files after it's done;
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java?rev=788800&r1=788799&r2=788800&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java Fri Jun 26 18:10:02 2009
@@ -50,6 +50,7 @@
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos));
+ //checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
@@ -61,6 +62,27 @@
assertTrue(seg.openReaderPassed);
assertNotNull(seg.diagnostics);
+
+ assertNotNull(seg.fieldNormStatus);
+ assertNull(seg.fieldNormStatus.error);
+ assertEquals(1, seg.fieldNormStatus.totFields);
+
+ assertNotNull(seg.termIndexStatus);
+ assertNull(seg.termIndexStatus.error);
+ assertEquals(1, seg.termIndexStatus.termCount);
+ assertEquals(19, seg.termIndexStatus.totFreq);
+ assertEquals(18, seg.termIndexStatus.totPos);
+
+ assertNotNull(seg.storedFieldStatus);
+ assertNull(seg.storedFieldStatus.error);
+ assertEquals(18, seg.storedFieldStatus.docCount);
+ assertEquals(18, seg.storedFieldStatus.totFields);
+
+ assertNotNull(seg.termVectorStatus);
+ assertNull(seg.termVectorStatus.error);
+ assertEquals(18, seg.termVectorStatus.docCount);
+ assertEquals(18, seg.termVectorStatus.totVectors);
+
assertTrue(seg.diagnostics.size() > 0);
final List onlySegments = new ArrayList();
onlySegments.add("_0");