You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2008/09/09 18:34:18 UTC
svn commit: r693508 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
src/test/org/apache/lucene/util/
Author: gsingers
Date: Tue Sep 9 09:34:18 2008
New Revision: 693508
URL: http://svn.apache.org/viewvc?rev=693508&view=rev
Log:
LUCENE-1354: Provide programmatic access to CheckIndex
Added:
lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java (with props)
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java
lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=693508&r1=693507&r2=693508&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Sep 9 09:34:18 2008
@@ -269,6 +269,8 @@
18. LUCENE-1001: Provide access to Payloads via Spans. All existing Span Query implementations in Lucene implement. (Mark Miller, Grant Ingersoll)
+19. LUCENE-1354: Provide programmatic access to CheckIndex (Grant Ingersoll, Mike McCandless)
+
Optimizations
1. LUCENE-705: When building a compound file, use
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java?rev=693508&r1=693507&r2=693508&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java Tue Sep 9 09:34:18 2008
@@ -42,7 +42,7 @@
public class CheckIndex {
- public static PrintStream out = System.out;
+ public static PrintStream out = null;
private static class MySegmentTermDocs extends SegmentTermDocs {
@@ -63,21 +63,23 @@
}
/** Returns true if index is clean, else false.*/
- public static boolean check(Directory dir, boolean doFix) throws IOException {
+ public static CheckIndexStatus check(Directory dir, boolean doFix) throws IOException {
return check(dir, doFix, null);
}
/** Returns true if index is clean, else false.*/
- public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException {
+ public static CheckIndexStatus check(Directory dir, boolean doFix, List onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance();
SegmentInfos sis = new SegmentInfos();
-
+ CheckIndexStatus result = new CheckIndexStatus();
+ result.dir = dir;
try {
sis.read(dir);
} catch (Throwable t) {
- out.println("ERROR: could not read any segments file in directory");
+ msg("ERROR: could not read any segments file in directory");
+ result.missingSegments = true;
t.printStackTrace(out);
- return false;
+ return result;
}
final int numSegments = sis.size();
@@ -86,17 +88,19 @@
try {
input = dir.openInput(segmentsFileName);
} catch (Throwable t) {
- out.println("ERROR: could not open segments file in directory");
+ msg("ERROR: could not open segments file in directory");
t.printStackTrace(out);
- return false;
+ result.cantOpenSegments = true;
+ return result;
}
int format = 0;
try {
format = input.readInt();
} catch (Throwable t) {
- out.println("ERROR: could not read segment file version in directory");
+ msg("ERROR: could not read segment file version in directory");
t.printStackTrace(out);
- return false;
+ result.missingSegmentVersion = true;
+ return result;
} finally {
if (input != null)
input.close();
@@ -128,7 +132,10 @@
}
}
- out.println("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
+ msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
+ result.segmentsFileName = segmentsFileName;
+ result.numSegments = numSegments;
+ result.segmentFormat = sFormat;
if (onlySegments != null) {
out.print("\nChecking only these segments:");
@@ -136,56 +143,80 @@
while (it.hasNext()) {
out.print(" " + it.next());
}
- out.println(":");
+ result.segmentsChecked.addAll(onlySegments);
+ msg(":");
}
if (skip) {
- out.println("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
- return false;
+ msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+ result.toolOutOfDate = true;
+ return result;
}
- SegmentInfos newSIS = (SegmentInfos) sis.clone();
- newSIS.clear();
- boolean changed = false;
- int totLoseDocCount = 0;
- int numBadSegments = 0;
+
+ result.newSegments = (SegmentInfos) sis.clone();
+ result.newSegments.clear();
+
for(int i=0;i<numSegments;i++) {
final SegmentInfo info = sis.info(i);
if (onlySegments != null && !onlySegments.contains(info.name))
continue;
- out.println(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ CheckIndexStatus.SegmentInfoStatus segInfoStat = new CheckIndexStatus.SegmentInfoStatus();
+ result.segmentInfos.add(segInfoStat);
+ msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ segInfoStat.name = info.name;
+ segInfoStat.docCount = info.docCount;
+
int toLoseDocCount = info.docCount;
SegmentReader reader = null;
try {
- out.println(" compound=" + info.getUseCompoundFile());
- out.println(" hasProx=" + info.getHasProx());
- out.println(" numFiles=" + info.files().size());
- out.println(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
+ msg(" compound=" + info.getUseCompoundFile());
+ segInfoStat.compound = info.getUseCompoundFile();
+ msg(" hasProx=" + info.getHasProx());
+ segInfoStat.hasProx = info.getHasProx();
+ msg(" numFiles=" + info.files().size());
+ segInfoStat.numFiles = info.files().size();
+ msg(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
+ segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
+
+
final int docStoreOffset = info.getDocStoreOffset();
if (docStoreOffset != -1) {
- out.println(" docStoreOffset=" + docStoreOffset);
- out.println(" docStoreSegment=" + info.getDocStoreSegment());
- out.println(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
+ msg(" docStoreOffset=" + docStoreOffset);
+ segInfoStat.docStoreOffset = docStoreOffset;
+ msg(" docStoreSegment=" + info.getDocStoreSegment());
+ segInfoStat.docStoreSegment = info.getDocStoreSegment();
+ msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
+ segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
}
final String delFileName = info.getDelFileName();
- if (delFileName == null)
- out.println(" no deletions");
- else
- out.println(" has deletions [delFileName=" + delFileName + "]");
+ if (delFileName == null){
+ msg(" no deletions");
+ segInfoStat.hasDeletions = false;
+ }
+ else{
+ msg(" has deletions [delFileName=" + delFileName + "]");
+ segInfoStat.hasDeletions = true;
+ segInfoStat.deletionsFileName = delFileName;
+
+ }
out.print(" test: open reader.........");
reader = SegmentReader.get(info);
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
if (reader.hasDeletions()) {
- if (info.docCount - numDocs != info.getDelCount())
+ if (info.docCount - numDocs != info.getDelCount()){
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
- out.println("OK [" + (info.docCount - numDocs) + " deleted docs]");
+ }
+ segInfoStat.numDeleted = info.docCount - numDocs;
+ msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
} else {
- if (info.getDelCount() != 0)
+ if (info.getDelCount() != 0){
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
- out.println("OK");
+ }
+ msg("OK");
}
out.print(" test: fields, norms.......");
@@ -198,8 +229,8 @@
throw new RuntimeException("norms for field \"" + fieldName + "\" is length " + b.length + " != maxDoc " + info.docCount);
}
- out.println("OK [" + fieldNames.size() + " fields]");
-
+ msg("OK [" + fieldNames.size() + " fields]");
+ segInfoStat.numFields = fieldNames.size();
out.print(" test: terms, freq, prox...");
final TermEnum termEnum = reader.terms();
final TermPositions termPositions = reader.termPositions();
@@ -255,7 +286,7 @@
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
}
- out.println("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
+ msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
out.print(" test: stored fields.......");
int docCount = 0;
@@ -270,7 +301,7 @@
if (docCount != reader.numDocs())
throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
- out.println("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
+ msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");
out.print(" test: term vectors........");
int totVectors = 0;
@@ -281,22 +312,21 @@
totVectors += tfv.length;
}
- out.println("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
- out.println("");
+ msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
+ msg("");
} catch (Throwable t) {
- out.println("FAILED");
+ msg("FAILED");
String comment;
if (doFix)
comment = "will remove reference to this segment (-fix is specified)";
else
comment = "would remove reference to this segment (-fix was not specified)";
- out.println(" WARNING: " + comment + "; full exception:");
+ msg(" WARNING: " + comment + "; full exception:");
t.printStackTrace(out);
- out.println("");
- totLoseDocCount += toLoseDocCount;
- numBadSegments++;
- changed = true;
+ msg("");
+ result.totLoseDocCount += toLoseDocCount;
+ result.numBadSegments++;
continue;
} finally {
if (reader != null)
@@ -304,50 +334,25 @@
}
// Keeper
- newSIS.add(info.clone());
+ result.newSegments.add(info.clone());
}
- if (!changed) {
- out.println("No problems were detected with this index.\n");
- return true;
- } else {
- out.println("WARNING: " + numBadSegments + " broken segments detected");
- if (doFix)
- out.println("WARNING: " + totLoseDocCount + " documents will be lost");
- else
- out.println("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
- out.println();
- }
-
- if (doFix) {
- out.println("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
- for(int i=0;i<5;i++) {
- try {
- Thread.sleep(1000);
- } catch (InterruptedException ie) {
- Thread.currentThread().interrupt();
- i--;
- continue;
- }
-
- out.println(" " + (5-i) + "...");
- }
- out.print("Writing...");
- try {
- newSIS.commit(dir);
- } catch (Throwable t) {
- out.println("FAILED; exiting");
- t.printStackTrace(out);
- return false;
- }
- out.println("OK");
- out.println("Wrote new segments file \"" + newSIS.getCurrentSegmentFileName() + "\"");
- } else {
- out.println("NOTE: would write new segments file [-fix was not specified]");
- }
- out.println("");
+ if (0 == result.numBadSegments) {
+ result.clean = true;
+ msg("No problems were detected with this index.\n");
+ } else
+ msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
- return false;
+ return result;
+ }
+
+ /** Repairs the index using previously returned result from
+ * {@link #check}. <b>WARNING</b>: this writes a new
+ * segments file into the index, effectively removing
+ * all documents in broken segments from the index. BE
+ * CAREFUL. */
+ static public void fix(CheckIndexStatus result) throws IOException {
+ result.newSegments.commit(result.dir);
}
static boolean assertsOn;
@@ -357,6 +362,12 @@
return true;
}
+ private static void msg(String msg) {
+ if (out != null) {
+ out.println(msg);
+ }
+ }
+
public static void main(String[] args) throws Throwable {
boolean doFix = false;
@@ -369,14 +380,14 @@
i++;
} else if (args[i].equals("-segment")) {
if (i == args.length-1) {
- out.println("ERROR: missing name for -segment option");
+ msg("ERROR: missing name for -segment option");
System.exit(1);
}
onlySegments.add(args[i+1]);
i += 2;
} else {
if (indexPath != null) {
- out.println("ERROR: unexpected extra argument '" + args[i] + "'");
+ msg("ERROR: unexpected extra argument '" + args[i] + "'");
System.exit(1);
}
indexPath = args[i];
@@ -385,8 +396,8 @@
}
if (indexPath == null) {
- out.println("\nERROR: index path not specified");
- out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
+ msg("\nERROR: index path not specified");
+ msg("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
@@ -412,31 +423,55 @@
if (onlySegments.size() == 0)
onlySegments = null;
else if (doFix) {
- out.println("ERROR: cannot specify both -fix and -segment");
+ msg("ERROR: cannot specify both -fix and -segment");
System.exit(1);
}
assert testAsserts();
if (!assertsOn)
- out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled");
+ msg("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled");
- out.println("\nOpening index @ " + indexPath + "\n");
+ msg("\nOpening index @ " + indexPath + "\n");
Directory dir = null;
try {
dir = FSDirectory.getDirectory(indexPath);
} catch (Throwable t) {
- out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ msg("ERROR: could not open directory \"" + indexPath + "\"; exiting");
t.printStackTrace(out);
System.exit(1);
}
- boolean isClean = check(dir, doFix, onlySegments);
+ CheckIndexStatus result = check(dir, doFix, onlySegments);
+
+ if (!result.clean) {
+ if (!doFix){
+ msg("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ } else {
+ msg("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ msg("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ for(int s=0;s<5;s++) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ s--;
+ continue;
+ }
+ msg(" " + (5-i) + "...");
+ }
+ msg("Writing...");
+ CheckIndex.fix(result);
+ }
+ msg("OK");
+ msg("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
+ }
+ msg("");
final int exitCode;
- if (isClean)
+ if (result != null && result.clean == true)
exitCode = 0;
else
exitCode = 1;
System.exit(exitCode);
- }
+ }
}
Added: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java?rev=693508&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java Tue Sep 9 09:34:18 2008
@@ -0,0 +1,57 @@
+package org.apache.lucene.index;
+
+import org.apache.lucene.store.Directory;
+
+import java.util.List;
+import java.util.ArrayList;
+
+
+/**
+ *
+ *
+ **/
+public class CheckIndexStatus {
+
+ public boolean clean;
+
+
+ public boolean missingSegments;
+ public boolean cantOpenSegments;
+ public boolean missingSegmentVersion;
+
+
+ public String segmentsFileName;
+ public int numSegments;
+ public String segmentFormat;
+ public List/*<String>*/ segmentsChecked = new ArrayList();
+
+ public boolean toolOutOfDate;
+
+ public List/*<SegmentInfoStatus*/ segmentInfos = new ArrayList();
+ public Directory dir;
+ public SegmentInfos newSegments;
+ public int totLoseDocCount;
+ public int numBadSegments;
+
+ public static class SegmentInfoStatus{
+ public String name;
+ public int docCount;
+ public boolean compound;
+ public int numFiles;
+ public double sizeMB;
+ public int docStoreOffset = -1;
+ public String docStoreSegment;
+ public boolean docStoreCompoundFile;
+
+ public boolean hasDeletions;
+ public String deletionsFileName;
+ public int numDeleted;
+
+ public boolean openReaderPassed;
+
+ int numFields;
+
+ public boolean hasProx;
+ }
+
+}
\ No newline at end of file
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndexStatus.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java?rev=693508&r1=693507&r2=693508&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestCheckIndex.java Tue Sep 9 09:34:18 2008
@@ -49,13 +49,14 @@
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex.out = new PrintStream(bos);
- if (!CheckIndex.check(dir, false, null)) {
+ CheckIndexStatus indexStatus = CheckIndex.check(dir, false, null);
+ if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString());
fail();
}
final List onlySegments = new ArrayList();
onlySegments.add("_0");
- assertTrue(CheckIndex.check(dir, false, onlySegments));
+ assertTrue(CheckIndex.check(dir, false, onlySegments).clean == true);
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java?rev=693508&r1=693507&r2=693508&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/_TestUtil.java Tue Sep 9 09:34:18 2008
@@ -23,6 +23,7 @@
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.index.CheckIndexStatus;
import org.apache.lucene.store.Directory;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
@@ -60,7 +61,10 @@
public static boolean checkIndex(Directory dir) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex.out = new PrintStream(bos);
- if (!CheckIndex.check(dir, false, null)) {
+
+ //TODO: fix this
+ CheckIndexStatus indexStatus = CheckIndex.check(dir, false, null);
+ if (indexStatus == null || indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString());
throw new RuntimeException("CheckIndex failed");