You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/03/08 19:43:45 UTC

svn commit: r1665064 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/CHANGES.txt lucene/core/ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java

Author: rmuir
Date: Sun Mar  8 18:43:44 2015
New Revision: 1665064

URL: http://svn.apache.org/r1665064
Log:
LUCENE-6341: Add a -fast option to CheckIndex

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1665064&r1=1665063&r2=1665064&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Sun Mar  8 18:43:44 2015
@@ -25,6 +25,8 @@ New Features
 * LUCENE-6304: Added a new MatchNoDocsQuery that matches no documents.
   (Lee Hinman via Adrien Grand)
 
+* LUCENE-6341: Add a -fast option to CheckIndex. (Robert Muir)
+
 Bug Fixes
 
 * LUCENE-6249: StandardQueryParser doesn't support pure negative clauses. 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1665064&r1=1665063&r2=1665064&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Sun Mar  8 18:43:44 2015
@@ -209,6 +209,9 @@ public class CheckIndex implements Close
       
       /** Status for testing of DocValues (null if DocValues could not be tested). */
       public DocValuesStatus docValuesStatus;
+      
+      /** Version the segment was written with */
+      public Version version;
     }
     
     /**
@@ -412,6 +415,20 @@ public class CheckIndex implements Close
   }
 
   private boolean verbose;
+  
+  /** See {@link #getChecksumsOnly}. */
+  public boolean getChecksumsOnly() {
+    return checksumsOnly;
+  }
+  
+  /** 
+   * If true, only validate physical integrity for all files. 
+   * Note that the returned nested status objects (e.g. storedFieldStatus) will be null.  */
+  public void setChecksumsOnly(boolean v) {
+    checksumsOnly = v;
+  }
+  
+  private boolean checksumsOnly;
 
   /** Set infoStream where messages should go.  If null, no
    *  messages are printed.  If verbose is true then more
@@ -594,10 +611,10 @@ public class CheckIndex implements Close
       result.segmentInfos.add(segInfoStat);
       msg(infoStream, "  " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " maxDoc=" + info.info.maxDoc());
       segInfoStat.name = info.info.name;
-      segInfoStat.maxDoc = info.info.maxDoc();
+      segInfoStat.maxDoc = info.info.maxDoc();      
+      segInfoStat.version = info.info.getVersion();
       
-      final Version version = info.info.getVersion();
-      if (info.info.maxDoc() <= 0 && version != null && version.onOrAfter(Version.LUCENE_4_5_0)) {
+      if (info.info.maxDoc() <= 0 && segInfoStat.version.onOrAfter(Version.LUCENE_4_5_0)) {
         throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc());
       }
 
@@ -606,7 +623,7 @@ public class CheckIndex implements Close
       SegmentReader reader = null;
 
       try {
-        msg(infoStream, "    version=" + (version == null ? "3.0" : version));
+        msg(infoStream, "    version=" + segInfoStat.version);
         msg(infoStream, "    id=" + StringHelper.idToString(info.info.getId()));
         final Codec codec = info.info.getCodec();
         msg(infoStream, "    codec=" + codec);
@@ -669,42 +686,45 @@ public class CheckIndex implements Close
           }
         }
         
-        // Test Livedocs
-        segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
-
-        // Test Fieldinfos
-        segInfoStat.fieldInfoStatus = testFieldInfos(reader, infoStream, failFast);
-        
-        // Test Field Norms
-        segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
-
-        // Test the Term Index
-        segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast);
-
-        // Test Stored Fields
-        segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
-
-        // Test Term Vectors
-        segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast);
-
-        segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
-
-        // Rethrow the first exception we encountered
-        //  This will cause stats for failed segments to be incremented properly
-        if (segInfoStat.liveDocStatus.error != null) {
-          throw new RuntimeException("Live docs test failed");
-        } else if (segInfoStat.fieldInfoStatus.error != null) {
-          throw new RuntimeException("Field Info test failed");
-        } else if (segInfoStat.fieldNormStatus.error != null) {
-          throw new RuntimeException("Field Norm test failed");
-        } else if (segInfoStat.termIndexStatus.error != null) {
-          throw new RuntimeException("Term Index test failed");
-        } else if (segInfoStat.storedFieldStatus.error != null) {
-          throw new RuntimeException("Stored Field test failed");
-        } else if (segInfoStat.termVectorStatus.error != null) {
-          throw new RuntimeException("Term Vector test failed");
-        }  else if (segInfoStat.docValuesStatus.error != null) {
-          throw new RuntimeException("DocValues test failed");
+        if (checksumsOnly == false) {
+          // Test Livedocs
+          segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
+
+          // Test Fieldinfos
+          segInfoStat.fieldInfoStatus = testFieldInfos(reader, infoStream, failFast);
+        
+          // Test Field Norms
+          segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
+
+          // Test the Term Index
+          segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast);
+
+          // Test Stored Fields
+          segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
+
+          // Test Term Vectors
+          segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast);
+
+          // Test Docvalues
+          segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
+
+          // Rethrow the first exception we encountered
+          //  This will cause stats for failed segments to be incremented properly
+          if (segInfoStat.liveDocStatus.error != null) {
+            throw new RuntimeException("Live docs test failed");
+          } else if (segInfoStat.fieldInfoStatus.error != null) {
+            throw new RuntimeException("Field Info test failed");
+          } else if (segInfoStat.fieldNormStatus.error != null) {
+            throw new RuntimeException("Field Norm test failed");
+          } else if (segInfoStat.termIndexStatus.error != null) {
+            throw new RuntimeException("Term Index test failed");
+          } else if (segInfoStat.storedFieldStatus.error != null) {
+            throw new RuntimeException("Stored Field test failed");
+          } else if (segInfoStat.termVectorStatus.error != null) {
+            throw new RuntimeException("Term Vector test failed");
+          }  else if (segInfoStat.docValuesStatus.error != null) {
+            throw new RuntimeException("DocValues test failed");
+          }
         }
 
         msg(infoStream, "");
@@ -748,6 +768,21 @@ public class CheckIndex implements Close
       msg(infoStream, "ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
     }
     
+    // if someone uses the -fast option, check that it wasnt a no-op or weak check.
+    if (getChecksumsOnly()) {
+      boolean old = false; // no ids 
+      boolean ancient = false; // no checksums
+      for (Status.SegmentInfoStatus segment : result.segmentInfos) {
+        old |= !segment.version.onOrAfter(Version.LUCENE_5_0_0);
+        ancient |= !segment.version.onOrAfter(Version.LUCENE_4_8_0);
+      }
+      if (ancient) {
+        msg(infoStream, "WARNING: Some segments are older than 4.8 and have no checksums. Run checkindex without -fast for full verification.");
+      } else if (old) {
+        msg(infoStream, "WARNING: Some segments are older than 5.0 and have no identifiers. Run checkindex without -fast for full verification.");
+      }
+    }
+    
     if (result.clean) {
       msg(infoStream, "No problems were detected with this index.\n");
     }
@@ -2111,13 +2146,16 @@ public class CheckIndex implements Close
     boolean doExorcise = false;
     boolean doCrossCheckTermVectors = false;
     boolean verbose = false;
+    boolean doChecksumsOnly = false;
     List<String> onlySegments = new ArrayList<>();
     String indexPath = null;
     String dirImpl = null;
     int i = 0;
     while(i < args.length) {
       String arg = args[i];
-      if ("-exorcise".equals(arg)) {
+      if ("-fast".equals(arg)) {
+        doChecksumsOnly = true;
+      } else if ("-exorcise".equals(arg)) {
         doExorcise = true;
       } else if ("-crossCheckTermVectors".equals(arg)) {
         doCrossCheckTermVectors = true;
@@ -2152,6 +2190,7 @@ public class CheckIndex implements Close
       System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
                          "\n" +
                          "  -exorcise: actually write a new segments_N file, removing any problematic segments\n" +
+                         "  -fast: just verify file checksums, omitting logical integrity checks\n" + 
                          "  -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
                          "  -codec X: when exorcising, codec to write the new segments_N file with\n" +
                          "  -verbose: print additional details\n" +
@@ -2186,6 +2225,11 @@ public class CheckIndex implements Close
       System.out.println("ERROR: cannot specify both -exorcise and -segment");
       return 1;
     }
+    
+    if (doChecksumsOnly && doCrossCheckTermVectors) {
+      System.out.println("ERROR: cannot specify both -fast and -crossCheckTermVectors");
+      return 1;
+    }
 
     System.out.println("\nOpening index @ " + indexPath + "\n");
     Directory directory = null;
@@ -2205,6 +2249,7 @@ public class CheckIndex implements Close
     try (Directory dir = directory;
          CheckIndex checker = new CheckIndex(dir)) {
       checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
+      checker.setChecksumsOnly(doChecksumsOnly);
       checker.setInfoStream(System.out, verbose);
       
       Status result = checker.checkIndex(onlySegments);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java?rev=1665064&r1=1665063&r2=1665064&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java Sun Mar  8 18:43:44 2015
@@ -24,9 +24,11 @@ import java.util.List;
 import java.util.ArrayList;
 
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.Token;
@@ -118,6 +120,54 @@ public class TestCheckIndex extends Luce
     dir.close(); // checkindex
   }
   
+  public void testChecksumsOnly() throws IOException {
+    LineFileDocs lf = new LineFileDocs(random());
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
+    for (int i = 0; i < 100; i++) {
+      iw.addDocument(lf.nextDoc());
+    }
+    iw.addDocument(new Document());
+    iw.commit();
+    iw.close();
+    lf.close();
+    
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
+    if (VERBOSE) checker.setInfoStream(System.out);
+    CheckIndex.Status indexStatus = checker.checkIndex();
+    assertTrue(indexStatus.clean);
+    checker.close();
+    dir.close();
+    analyzer.close();
+  }
+  
+  public void testChecksumsOnlyVerbose() throws IOException {
+    LineFileDocs lf = new LineFileDocs(random());
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
+    for (int i = 0; i < 100; i++) {
+      iw.addDocument(lf.nextDoc());
+    }
+    iw.addDocument(new Document());
+    iw.commit();
+    iw.close();
+    lf.close();
+    
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
+    if (VERBOSE) checker.setInfoStream(System.out);
+    CheckIndex.Status indexStatus = checker.checkIndex();
+    assertTrue(indexStatus.clean);
+    checker.close();
+    dir.close();
+    analyzer.close();
+  }
+  
   public void testObtainsLock() throws IOException {
     Directory dir = newDirectory();
     IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));