You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gc...@apache.org on 2015/11/30 23:46:49 UTC
svn commit: r1717340 - in /lucene/dev/trunk: lucene/core/src/java/org/apache/lucene/index/ lucene/core/src/test/org/apache/lucene/index/ lucene/test-framework/src/java/org/apache/lucene/index/ solr/ solr/core/src/java/org/apache/solr/index/ solr/core/s...

Author: gchanan
Date: Mon Nov 30 22:46:48 2015
New Revision: 1717340

URL: http://svn.apache.org/viewvc?rev=1717340&view=rev
Log:
SOLR-7928: Improve CheckIndex to work against HdfsDirectory

Added:
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/CheckHdfsIndex.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/package-info.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
    lucene/dev/trunk/solr/CHANGES.txt

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1717340&r1=1717339&r2=1717340&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Mon Nov 30 22:46:48 2015
@@ -73,7 +73,7 @@ import org.apache.lucene.util.automaton.
  * @lucene.experimental Please make a complete backup of your
  * index before using this to exorcise corrupted documents from your index!
  */
-public class CheckIndex implements Closeable {
+public final class CheckIndex implements Closeable {
 
   private PrintStream infoStream;
   private Directory dir;
@@ -2297,7 +2297,11 @@ public class CheckIndex implements Close
     return true;
   }
 
-  private static boolean assertsOn() {
+  /**
+   * Check whether asserts are enabled or not.
+   * @return true iff asserts are enabled
+   */
+  public static boolean assertsOn() {
     assert testAsserts();
     return assertsOn;
   }
@@ -2338,11 +2342,11 @@ public class CheckIndex implements Close
     int exitCode = doMain(args);
     System.exit(exitCode);
   }
-  
-  // actual main: returns exit code instead of terminating JVM (for easy testing)
-  @SuppressForbidden(reason = "System.out required: command line tool")
-  private static int doMain(String args[]) throws IOException, InterruptedException {
 
+  /**
+   * Run-time configuration options for CheckIndex commands.
+   */
+  public static class Options {
     boolean doExorcise = false;
     boolean doCrossCheckTermVectors = false;
     boolean verbose = false;
@@ -2350,44 +2354,113 @@ public class CheckIndex implements Close
     List<String> onlySegments = new ArrayList<>();
     String indexPath = null;
     String dirImpl = null;
+    PrintStream out = null;
+
+    /** Sole constructor. */
+    public Options() {}
+
+    /**
+     * Get the name of the FSDirectory implementation class to use.
+     */
+    public String getDirImpl() {
+      return dirImpl;
+    }
+
+    /**
+     * Get the directory containing the index.
+     */
+    public String getIndexPath() {
+      return indexPath;
+    }
+
+    /**
+     * Set the PrintStream to use for reporting results.
+     */
+    public void setOut(PrintStream out) {
+      this.out = out;
+    }
+  }
+
+  // actual main: returns exit code instead of terminating JVM (for easy testing)
+  @SuppressForbidden(reason = "System.out required: command line tool")
+  private static int doMain(String args[]) throws IOException, InterruptedException {
+    Options opts;
+    try {
+      opts = parseOptions(args);
+    } catch (IllegalArgumentException e) {
+      System.out.println(e.getMessage());
+      return 1;
+    }
+
+    if (!assertsOn())
+      System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+
+    System.out.println("\nOpening index @ " + opts.indexPath + "\n");
+    Directory directory = null;
+    Path path = Paths.get(opts.indexPath);
+    try {
+      if (opts.dirImpl == null) {
+        directory = FSDirectory.open(path);
+      } else {
+        directory = CommandLineUtil.newFSDirectory(opts.dirImpl, path);
+      }
+    } catch (Throwable t) {
+      System.out.println("ERROR: could not open directory \"" + opts.indexPath + "\"; exiting");
+      t.printStackTrace(System.out);
+      return 1;
+    }
+
+    try (Directory dir = directory;
+         CheckIndex checker = new CheckIndex(dir)) {
+      opts.out = System.out;
+      return checker.doCheck(opts);
+    }
+  }
+
+  /**
+   * Parse command line args into fields
+   * @param args The command line arguments
+   * @return An Options struct
+   * @throws IllegalArgumentException if any of the CLI args are invalid
+   */
+  public static Options parseOptions(String[] args) {
+    Options opts = new Options();
+
     int i = 0;
     while(i < args.length) {
       String arg = args[i];
       if ("-fast".equals(arg)) {
-        doChecksumsOnly = true;
+        opts.doChecksumsOnly = true;
       } else if ("-exorcise".equals(arg)) {
-        doExorcise = true;
+        opts.doExorcise = true;
       } else if ("-crossCheckTermVectors".equals(arg)) {
-        doCrossCheckTermVectors = true;
+        opts.doCrossCheckTermVectors = true;
       } else if (arg.equals("-verbose")) {
-        verbose = true;
+        opts.verbose = true;
       } else if (arg.equals("-segment")) {
         if (i == args.length-1) {
-          System.out.println("ERROR: missing name for -segment option");
-          return 1;
+          throw new IllegalArgumentException("ERROR: missing name for -segment option");
         }
         i++;
-        onlySegments.add(args[i]);
+        opts.onlySegments.add(args[i]);
       } else if ("-dir-impl".equals(arg)) {
         if (i == args.length - 1) {
-          System.out.println("ERROR: missing value for -dir-impl option");
-          return 1;
+          throw new IllegalArgumentException("ERROR: missing value for -dir-impl option");
         }
         i++;
-        dirImpl = args[i];
+        opts.dirImpl = args[i];
       } else {
-        if (indexPath != null) {
-          System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
-          return 1;
+        if (opts.indexPath != null) {
+          throw new IllegalArgumentException("ERROR: unexpected extra argument '" + args[i] + "'");
         }
-        indexPath = args[i];
+        opts.indexPath = args[i];
       }
       i++;
     }
 
-    if (indexPath == null) {
-      System.out.println("\nERROR: index path not specified");
-      System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
+    if (opts.indexPath == null) {
+      throw new IllegalArgumentException("\nERROR: index path not specified" +
+                         "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
                          "\n" +
                          "  -exorcise: actually write a new segments_N file, removing any problematic segments\n" +
                          "  -fast: just verify file checksums, omitting logical integrity checks\n" + 
@@ -2413,74 +2486,59 @@ public class CheckIndex implements Close
                          "\n" +
                          "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
                          "corruption, else 0.\n");
-      return 1;
     }
 
-    if (!assertsOn())
-      System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
-
-    if (onlySegments.size() == 0)
-      onlySegments = null;
-    else if (doExorcise) {
-      System.out.println("ERROR: cannot specify both -exorcise and -segment");
-      return 1;
+    if (opts.onlySegments.size() == 0) {
+      opts.onlySegments = null;
+    } else if (opts.doExorcise) {
+      throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
     }
     
-    if (doChecksumsOnly && doCrossCheckTermVectors) {
-      System.out.println("ERROR: cannot specify both -fast and -crossCheckTermVectors");
-      return 1;
+    if (opts.doChecksumsOnly && opts.doCrossCheckTermVectors) {
+      throw new IllegalArgumentException("ERROR: cannot specify both -fast and -crossCheckTermVectors");
     }
 
-    System.out.println("\nOpening index @ " + indexPath + "\n");
-    Directory directory = null;
-    Path path = Paths.get(indexPath);
-    try {
-      if (dirImpl == null) {
-        directory = FSDirectory.open(path);
-      } else {
-        directory = CommandLineUtil.newFSDirectory(dirImpl, path);
-      }
-    } catch (Throwable t) {
-      System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
-      t.printStackTrace(System.out);
+    return opts;
+  }
+
+  /**
+   * Actually perform the index check
+   * @param opts The options to use for this check
+   * @return 0 iff the index is clean, 1 otherwise
+   */
+  public int doCheck(Options opts) throws IOException, InterruptedException {
+    setCrossCheckTermVectors(opts.doCrossCheckTermVectors);
+    setChecksumsOnly(opts.doChecksumsOnly);
+    setInfoStream(opts.out, opts.verbose);
+
+    Status result = checkIndex(opts.onlySegments);
+    if (result.missingSegments) {
       return 1;
     }
 
-    try (Directory dir = directory;
-         CheckIndex checker = new CheckIndex(dir)) {
-      checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
-      checker.setChecksumsOnly(doChecksumsOnly);
-      checker.setInfoStream(System.out, verbose);
-      
-      Status result = checker.checkIndex(onlySegments);
-      if (result.missingSegments) {
-        return 1;
-      }
-      
-      if (!result.clean) {
-        if (!doExorcise) {
-          System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
-        } else {
-          System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
-          System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
-          for(int s=0;s<5;s++) {
-            Thread.sleep(1000);
-            System.out.println("  " + (5-s) + "...");
-          }
-          System.out.println("Writing...");
-          checker.exorciseIndex(result);
-          System.out.println("OK");
-          System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
-        }
-      }
-      System.out.println("");
-      
-      if (result.clean == true) {
-        return 0;
+    if (!result.clean) {
+      if (!opts.doExorcise) {
+        opts.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
       } else {
-        return 1;
+        opts.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+        opts.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+        for(int s=0;s<5;s++) {
+          Thread.sleep(1000);
+          opts.out.println("  " + (5-s) + "...");
+        }
+        opts.out.println("Writing...");
+        exorciseIndex(result);
+        opts.out.println("OK");
+        opts.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
       }
     }
+    opts.out.println("");
+
+    if (result.clean == true) {
+      return 0;
+    } else {
+      return 1;
+    }
   }
 
   private static double nsToSec(long ns) {

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java?rev=1717340&r1=1717339&r2=1717340&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java Mon Nov 30 22:46:48 2015
@@ -17,177 +17,48 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.CannedTokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.TextField;
+
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.LineFileDocs;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
+import org.junit.Test;
+
+public class TestCheckIndex extends BaseTestCheckIndex {
+  private Directory directory;
 
-public class TestCheckIndex extends LuceneTestCase {
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    directory = newDirectory();
+  }
 
+  @Override
+  public void tearDown() throws Exception {
+    directory.close();
+    super.tearDown();
+  }
+  
+  @Test
   public void testDeletedDocs() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
-                                                 .setMaxBufferedDocs(2));
-    for(int i=0;i<19;i++) {
-      Document doc = new Document();
-      FieldType customType = new FieldType(TextField.TYPE_STORED);
-      customType.setStoreTermVectors(true);
-      customType.setStoreTermVectorPositions(true);
-      customType.setStoreTermVectorOffsets(true);
-      doc.add(newField("field", "aaa"+i, customType));
-      writer.addDocument(doc);
-    }
-    writer.forceMerge(1);
-    writer.commit();
-    writer.deleteDocuments(new Term("field","aaa5"));
-    writer.close();
-
-    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-    CheckIndex checker = new CheckIndex(dir);
-    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
-    if (VERBOSE) checker.setInfoStream(System.out);
-    CheckIndex.Status indexStatus = checker.checkIndex();
-    if (indexStatus.clean == false) {
-      System.out.println("CheckIndex failed");
-      System.out.println(bos.toString(IOUtils.UTF_8));
-      fail();
-    }
-    
-    final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
-    assertTrue(seg.openReaderPassed);
-
-    assertNotNull(seg.diagnostics);
-    
-    assertNotNull(seg.fieldNormStatus);
-    assertNull(seg.fieldNormStatus.error);
-    assertEquals(1, seg.fieldNormStatus.totFields);
-
-    assertNotNull(seg.termIndexStatus);
-    assertNull(seg.termIndexStatus.error);
-    assertEquals(18, seg.termIndexStatus.termCount);
-    assertEquals(18, seg.termIndexStatus.totFreq);
-    assertEquals(18, seg.termIndexStatus.totPos);
-
-    assertNotNull(seg.storedFieldStatus);
-    assertNull(seg.storedFieldStatus.error);
-    assertEquals(18, seg.storedFieldStatus.docCount);
-    assertEquals(18, seg.storedFieldStatus.totFields);
-
-    assertNotNull(seg.termVectorStatus);
-    assertNull(seg.termVectorStatus.error);
-    assertEquals(18, seg.termVectorStatus.docCount);
-    assertEquals(18, seg.termVectorStatus.totVectors);
-
-    assertNotNull(seg.diagnostics.get("java.vm.version"));
-    assertNotNull(seg.diagnostics.get("java.runtime.version"));
-
-    assertTrue(seg.diagnostics.size() > 0);
-    final List<String> onlySegments = new ArrayList<>();
-    onlySegments.add("_0");
-    
-    assertTrue(checker.checkIndex(onlySegments).clean == true);
-    checker.close();
-    dir.close();
+    testDeletedDocs(directory);
   }
   
-  // LUCENE-4221: we have to let these thru, for now
+  @Test
   public void testBogusTermVectors() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    Document doc = new Document();
-    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    Field field = new Field("foo", "", ft);
-    field.setTokenStream(new CannedTokenStream(
-        new Token("bar", 5, 10), new Token("bar", 1, 4)
-    ));
-    doc.add(field);
-    iw.addDocument(doc);
-    iw.close();
-    dir.close(); // checkindex
+    testBogusTermVectors(directory);
   }
   
+  @Test
   public void testChecksumsOnly() throws IOException {
-    LineFileDocs lf = new LineFileDocs(random());
-    Directory dir = newDirectory();
-    MockAnalyzer analyzer = new MockAnalyzer(random());
-    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
-    for (int i = 0; i < 100; i++) {
-      iw.addDocument(lf.nextDoc());
-    }
-    iw.addDocument(new Document());
-    iw.commit();
-    iw.close();
-    lf.close();
-    
-    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-    CheckIndex checker = new CheckIndex(dir);
-    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
-    if (VERBOSE) checker.setInfoStream(System.out);
-    CheckIndex.Status indexStatus = checker.checkIndex();
-    assertTrue(indexStatus.clean);
-    checker.close();
-    dir.close();
-    analyzer.close();
+    testChecksumsOnly(directory);
   }
   
+  @Test
   public void testChecksumsOnlyVerbose() throws IOException {
-    LineFileDocs lf = new LineFileDocs(random());
-    Directory dir = newDirectory();
-    MockAnalyzer analyzer = new MockAnalyzer(random());
-    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
-    for (int i = 0; i < 100; i++) {
-      iw.addDocument(lf.nextDoc());
-    }
-    iw.addDocument(new Document());
-    iw.commit();
-    iw.close();
-    lf.close();
-    
-    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-    CheckIndex checker = new CheckIndex(dir);
-    checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
-    if (VERBOSE) checker.setInfoStream(System.out);
-    CheckIndex.Status indexStatus = checker.checkIndex();
-    assertTrue(indexStatus.clean);
-    checker.close();
-    dir.close();
-    analyzer.close();
+    testChecksumsOnlyVerbose(directory);
   }
-  
+
+  @Test
   public void testObtainsLock() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    iw.addDocument(new Document());
-    iw.commit();
-    
-    // keep IW open...
-    try {
-      new CheckIndex(dir);
-      fail("should not have obtained write lock");
-    } catch (LockObtainFailedException expected) {
-      // ok
-    }
-    
-    iw.close();
-    dir.close();
+    testObtainsLock(directory);
   }
 }

Added: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java?rev=1717340&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java (added)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java Mon Nov 30 22:46:48 2015
@@ -0,0 +1,187 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Base class for CheckIndex tests.
+ */
+public class BaseTestCheckIndex extends LuceneTestCase {
+
+  public void testDeletedDocs(Directory dir) throws IOException {
+    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
+                                                 .setMaxBufferedDocs(2));
+    for(int i=0;i<19;i++) {
+      Document doc = new Document();
+      FieldType customType = new FieldType(TextField.TYPE_STORED);
+      customType.setStoreTermVectors(true);
+      customType.setStoreTermVectorPositions(true);
+      customType.setStoreTermVectorOffsets(true);
+      doc.add(newField("field", "aaa"+i, customType));
+      writer.addDocument(doc);
+    }
+    writer.forceMerge(1);
+    writer.commit();
+    writer.deleteDocuments(new Term("field","aaa5"));
+    writer.close();
+
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
+    if (VERBOSE) checker.setInfoStream(System.out);
+    CheckIndex.Status indexStatus = checker.checkIndex();
+    if (indexStatus.clean == false) {
+      System.out.println("CheckIndex failed");
+      System.out.println(bos.toString(IOUtils.UTF_8));
+      fail();
+    }
+    
+    final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
+    assertTrue(seg.openReaderPassed);
+
+    assertNotNull(seg.diagnostics);
+    
+    assertNotNull(seg.fieldNormStatus);
+    assertNull(seg.fieldNormStatus.error);
+    assertEquals(1, seg.fieldNormStatus.totFields);
+
+    assertNotNull(seg.termIndexStatus);
+    assertNull(seg.termIndexStatus.error);
+    assertEquals(18, seg.termIndexStatus.termCount);
+    assertEquals(18, seg.termIndexStatus.totFreq);
+    assertEquals(18, seg.termIndexStatus.totPos);
+
+    assertNotNull(seg.storedFieldStatus);
+    assertNull(seg.storedFieldStatus.error);
+    assertEquals(18, seg.storedFieldStatus.docCount);
+    assertEquals(18, seg.storedFieldStatus.totFields);
+
+    assertNotNull(seg.termVectorStatus);
+    assertNull(seg.termVectorStatus.error);
+    assertEquals(18, seg.termVectorStatus.docCount);
+    assertEquals(18, seg.termVectorStatus.totVectors);
+
+    assertNotNull(seg.diagnostics.get("java.vm.version"));
+    assertNotNull(seg.diagnostics.get("java.runtime.version"));
+
+    assertTrue(seg.diagnostics.size() > 0);
+    final List<String> onlySegments = new ArrayList<>();
+    onlySegments.add("_0");
+    
+    assertTrue(checker.checkIndex(onlySegments).clean == true);
+    checker.close();
+  }
+  
+  // LUCENE-4221: we have to let these thru, for now
+  public void testBogusTermVectors(Directory dir) throws IOException {
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    Document doc = new Document();
+    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+    ft.setStoreTermVectors(true);
+    ft.setStoreTermVectorOffsets(true);
+    Field field = new Field("foo", "", ft);
+    field.setTokenStream(new CannedTokenStream(
+        new Token("bar", 5, 10), new Token("bar", 1, 4)
+    ));
+    doc.add(field);
+    iw.addDocument(doc);
+    iw.close();
+  }
+  
+  public void testChecksumsOnly(Directory dir) throws IOException {
+    LineFileDocs lf = new LineFileDocs(random());
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
+    for (int i = 0; i < 100; i++) {
+      iw.addDocument(lf.nextDoc());
+    }
+    iw.addDocument(new Document());
+    iw.commit();
+    iw.close();
+    lf.close();
+    
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
+    if (VERBOSE) checker.setInfoStream(System.out);
+    CheckIndex.Status indexStatus = checker.checkIndex();
+    assertTrue(indexStatus.clean);
+    checker.close();
+    analyzer.close();
+  }
+  
+  public void testChecksumsOnlyVerbose(Directory dir) throws IOException {
+    LineFileDocs lf = new LineFileDocs(random());
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
+    for (int i = 0; i < 100; i++) {
+      iw.addDocument(lf.nextDoc());
+    }
+    iw.addDocument(new Document());
+    iw.commit();
+    iw.close();
+    lf.close();
+    
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
+    if (VERBOSE) checker.setInfoStream(System.out);
+    CheckIndex.Status indexStatus = checker.checkIndex();
+    assertTrue(indexStatus.clean);
+    checker.close();
+    analyzer.close();
+  }
+  
+  public void testObtainsLock(Directory dir) throws IOException {
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    iw.addDocument(new Document());
+    iw.commit();
+    
+    // keep IW open...
+    try {
+      new CheckIndex(dir);
+      fail("should not have obtained write lock");
+    } catch (LockObtainFailedException expected) {
+      // ok
+    }
+    
+    iw.close();
+  }
+}

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1717340&r1=1717339&r2=1717340&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Nov 30 22:46:48 2015
@@ -182,6 +182,9 @@ Detailed Change List
 New Features
 ----------------------
 
+* SOLR-7928: Improve CheckIndex to work against HdfsDirectory
+  (Mike Drob, Gregory Chanan)
+
 Other Changes
 ----------------------
 

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/CheckHdfsIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/CheckHdfsIndex.java?rev=1717340&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/CheckHdfsIndex.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/CheckHdfsIndex.java Mon Nov 30 22:46:48 2015
@@ -0,0 +1,80 @@
+package org.apache.solr.index.hdfs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.SuppressForbidden;
+import org.apache.solr.core.HdfsDirectoryFactory;
+import org.apache.solr.store.hdfs.HdfsDirectory;
+import org.apache.solr.util.HdfsUtil;
+
+public class CheckHdfsIndex {
+  public static void main(String[] args) throws IOException, InterruptedException {
+    int exitCode = doMain(args);
+    System.exit(exitCode);
+  }
+
+  // actual main: returns exit code instead of terminating JVM (for easy testing)
+  @SuppressForbidden(reason = "System.out required: command line tool")
+  protected static int doMain(String[] args) throws IOException, InterruptedException {
+    CheckIndex.Options opts;
+    try {
+      opts = CheckIndex.parseOptions(args);
+    } catch (IllegalArgumentException e) {
+      System.out.println(e.getMessage());
+      return 1;
+    }
+
+    if (!CheckIndex.assertsOn()) {
+      System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+    }
+
+    if (opts.getDirImpl() != null) {
+      System.out.println("\nIgnoring specified -dir-impl, instead using " + HdfsDirectory.class.getSimpleName());
+    }
+
+    System.out.println("\nOpening index @ " + opts.getIndexPath() + "\n");
+
+    Directory directory;
+    try {
+      directory = new HdfsDirectory(new Path(opts.getIndexPath()), getConf());
+    } catch (IOException e) {
+      System.out.println("ERROR: could not open hdfs directory \"" + opts.getIndexPath() + "\"; exiting");
+      e.printStackTrace(System.out);
+      return 1;
+    }
+
+    try (Directory dir = directory; CheckIndex checker = new CheckIndex(dir)) {
+      opts.setOut(System.out);
+      return checker.doCheck(opts);
+    }
+  }
+
+  private static Configuration getConf() {
+    Configuration conf = new Configuration();
+    String confDir = System.getProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY);
+    HdfsUtil.addHdfsResources(conf, confDir);
+    conf.setBoolean("fs.hdfs.impl.disable.cache", true);
+    return conf;
+  }
+}

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/package-info.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/package-info.java?rev=1717340&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/package-info.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/index/hdfs/package-info.java Mon Nov 30 22:46:48 2015
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+/**
+ * An HDFS CheckIndex implementation.
+ */
+package org.apache.solr.index.hdfs;
+

Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java?rev=1717340&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java Mon Nov 30 22:46:48 2015
@@ -0,0 +1,143 @@
+package org.apache.solr.index.hdfs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.lucene.index.BaseTestCheckIndex;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.NoLockFactory;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
+import org.apache.solr.cloud.hdfs.HdfsTestUtil;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.store.hdfs.HdfsDirectory;
+import org.apache.solr.util.BadHdfsThreadsFilter;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+
+@ThreadLeakFilters(defaultFilters = true, filters = {
+    BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
+})
+public class CheckHdfsIndexTest extends AbstractFullDistribZkTestBase {
+  private static MiniDFSCluster dfsCluster;
+  private static Path path;
+
+  private BaseTestCheckIndex testCheckIndex;
+  private Directory directory;
+
+  public CheckHdfsIndexTest() {
+    super();
+    sliceCount = 1;
+    fixShardCount(1);
+
+    testCheckIndex = new BaseTestCheckIndex();
+  }
+
+  @BeforeClass
+  public static void setupClass() throws Exception {
+    dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
+    path = new Path(HdfsTestUtil.getURI(dfsCluster) + "/solr/");
+  }
+
+  @AfterClass
+  public static void teardownClass() throws Exception {
+    HdfsTestUtil.teardownClass(dfsCluster);
+    dfsCluster = null;
+  }
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+
+    Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
+    conf.setBoolean("fs.hdfs.impl.disable.cache", true);
+
+    directory = new HdfsDirectory(path, NoLockFactory.INSTANCE, conf);
+  }
+
+  @Override
+  @After
+  public void tearDown() throws Exception {
+    directory.close();
+    dfsCluster.getFileSystem().delete(path, true);
+    super.tearDown();
+  }
+
+  @Override
+  protected String getDataDir(String dataDir) throws IOException {
+    return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
+  }
+
+  @Test
+  public void doTest() throws Exception {
+    indexr(id, 1);
+    commit();
+
+    waitForRecoveriesToFinish(false);
+
+    String[] args;
+    {
+      SolrClient client = clients.get(0);
+      NamedList<Object> response = client.query(new SolrQuery().setRequestHandler("/admin/system")).getResponse();
+      NamedList<Object> coreInfo = (NamedList<Object>) response.get("core");
+      String indexDir = (String) ((NamedList<Object>) coreInfo.get("directory")).get("data") + "/index";
+
+      args = new String[] {indexDir};
+    }
+
+    assertEquals("CheckHdfsIndex return status", 0, CheckHdfsIndex.doMain(args));
+  }
+
+  @Test
+  public void testDeletedDocs() throws IOException {
+    testCheckIndex.testDeletedDocs(directory);
+  }
+
+  @Test
+  public void testBogusTermVectors() throws IOException {
+    testCheckIndex.testBogusTermVectors(directory);
+  }
+
+  @Test
+  public void testChecksumsOnly() throws IOException {
+    testCheckIndex.testChecksumsOnly(directory);
+  }
+
+  @Test
+  public void testChecksumsOnlyVerbose() throws IOException {
+    testCheckIndex.testChecksumsOnlyVerbose(directory);
+  }
+
+  @Test
+  @Ignore("We explicitly use a NoLockFactory, so this test doesn't make sense.")
+  public void testObtainsLock() throws IOException {
+    testCheckIndex.testObtainsLock(directory);
+  }
+}