You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/01/16 21:24:53 UTC
svn commit: r496851 - in /lucene/java/trunk/src:
java/org/apache/lucene/index/ site/src/documentation/content/xdocs/
test/org/apache/lucene/index/
Author: mikemccand
Date: Tue Jan 16 12:24:52 2007
New Revision: 496851
URL: http://svn.apache.org/viewvc?view=rev&rev=496851
Log:
LUCENE-756: small improvement to not rely on IO operation (fileExists)
to check whether a "single norm file" is in use for the segment.
Instead, save this information per segment explicitly into the segment
infos file. Also renamed to "singleNormFile".
Modified:
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java Tue Jan 16 12:24:52 2007
@@ -44,10 +44,11 @@
// pre-2.1 (ie, must check file system to see
// if <name>.cfs and <name>.nrm exist)
- private byte withNrm; // 1 if this segment maintains norms in a single file;
- // -1 if not; 0 if check file is required to tell.
- // would be -1 for segments populated by DocumentWriter.
- // would be 1 for (newly created) merge resulted segments (both compound and non compound).
+ private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
+ // false otherwise
+ // this is currently false for segments populated by DocumentWriter
+ // and true for newly created merged segments (both
+ // compound and non compound).
public SegmentInfo(String name, int docCount, Directory dir) {
this.name = name;
@@ -56,13 +57,13 @@
delGen = -1;
isCompoundFile = 0;
preLockless = true;
- withNrm = 0;
+ hasSingleNormFile = false;
}
- public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) {
+ public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) {
this(name, docCount, dir);
this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1);
- this.withNrm = (byte) (withNrm ? 1 : -1);
+ this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
}
@@ -82,7 +83,7 @@
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
}
isCompoundFile = src.isCompoundFile;
- withNrm = src.withNrm;
+ hasSingleNormFile = src.hasSingleNormFile;
}
/**
@@ -99,6 +100,11 @@
docCount = input.readInt();
if (format <= SegmentInfos.FORMAT_LOCKLESS) {
delGen = input.readLong();
+ if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
+ hasSingleNormFile = (1 == input.readByte());
+ } else {
+ hasSingleNormFile = false;
+ }
int numNormGen = input.readInt();
if (numNormGen == -1) {
normGen = null;
@@ -115,8 +121,8 @@
normGen = null;
isCompoundFile = 0;
preLockless = true;
+ hasSingleNormFile = false;
}
- withNrm = 0;
}
void setNumFields(int numFields) {
@@ -179,7 +185,7 @@
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.preLockless = preLockless;
- si.withNrm = withNrm;
+ si.hasSingleNormFile = hasSingleNormFile;
if (normGen != null) {
si.normGen = (long[]) normGen.clone();
}
@@ -297,7 +303,7 @@
return IndexFileNames.fileNameFromGeneration(name, prefix + number, gen);
}
- if (withNrm()) {
+ if (hasSingleNormFile) {
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
return IndexFileNames.fileNameFromGeneration(name, prefix, 0);
@@ -337,31 +343,6 @@
}
/**
- * Returns true iff this segment stores field norms in a single .nrm file.
- */
- private boolean withNrm () throws IOException {
- if (withNrm == -1) {
- return false;
- }
- if (withNrm == 1) {
- return true;
- }
- Directory d = dir;
- try {
- if (getUseCompoundFile()) {
- d = new CompoundFileReader(dir, name + ".cfs");
- }
- boolean res = d.fileExists(name + "." + IndexFileNames.NORMS_EXTENSION);
- withNrm = (byte) (res ? 1 : -1); // avoid more file tests like this
- return res;
- } finally {
- if (d!=dir && d!=null) {
- d.close();
- }
- }
- }
-
- /**
* Save this segment's info.
*/
void write(IndexOutput output)
@@ -369,6 +350,7 @@
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
+ output.writeByte((byte) (hasSingleNormFile ? 1:0));
if (normGen == null) {
output.writeInt(-1);
} else {
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java Tue Jan 16 12:24:52 2007
@@ -33,7 +33,7 @@
/* Works since counter, the old 1st entry, is always >= 0 */
public static final int FORMAT = -1;
- /** This is the current file format written. It differs
+ /** This format adds details used for lockless commits. It differs
* slightly from the previous format in that file names
* are never re-used (write once). Instead, each file is
* written to the next generation. For example,
@@ -44,6 +44,13 @@
*/
public static final int FORMAT_LOCKLESS = -2;
+ /** This is the current file format written. It adds a
+ * "hasSingleNormFile" flag into each segment info.
+ * See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
+ * for details.
+ */
+ public static final int FORMAT_SINGLE_NORM_FILE = -3;
+
public int counter = 0; // used to name new segments
/**
* counts how often the index has been changed by adding or deleting docs.
@@ -184,7 +191,7 @@
int format = input.readInt();
if(format < 0){ // file contains explicit format info
// check that it is a format we can understand
- if (format < FORMAT_LOCKLESS)
+ if (format < FORMAT_SINGLE_NORM_FILE)
throw new IOException("Unknown format version: " + format);
version = input.readLong(); // read version
counter = input.readInt(); // read counter
@@ -245,7 +252,7 @@
IndexOutput output = directory.createOutput(segmentFileName);
try {
- output.writeInt(FORMAT_LOCKLESS); // write FORMAT
+ output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
output.writeLong(++version); // every write changes
// the index
output.writeInt(counter); // write counter
@@ -311,7 +318,7 @@
try {
format = input.readInt();
if(format < 0){
- if (format < FORMAT_LOCKLESS)
+ if (format < FORMAT_SINGLE_NORM_FILE)
throw new IOException("Unknown format version: " + format);
version = input.readLong(); // read version
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Tue Jan 16 12:24:52 2007
@@ -321,7 +321,7 @@
if (addedNrm) continue; // add .nrm just once
addedNrm = true;
}
- files.addElement(name);
+ files.addElement(name);
}
}
return files;
Modified: lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml Tue Jan 16 12:24:52 2007
@@ -762,8 +762,8 @@
<p>
The active segments in the index are stored in the
segment info file,
- <tt>segments_N</tt>
- . There may
+ <tt>segments_N</tt>.
+ There may
be one or more
<tt>segments_N</tt>
files in the
@@ -779,13 +779,13 @@
<p>
As of 2.1, there is also a file
- <tt>segments.gen</tt>
- . This file contains the
+ <tt>segments.gen</tt>.
+ This file contains the
current generation (the
<tt>_N</tt>
in
- <tt>segments_N</tt>
- ) of the index. This is
+ <tt>segments_N</tt>)
+ of the index. This is
used only as a fallback in case the current
generation cannot be accurately determined by
directory listing alone (as is the case for some
@@ -803,11 +803,9 @@
</p>
<p>
<b>2.1 and above:</b>
- Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGen
- <sup>NumField</sup>
- >
- <sup>SegCount</sup>
- , IsCompoundFile
+ Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, HasSingleNormFile, NumField,
+ NormGen<sup>NumField</sup>,
+ IsCompoundFile><sup>SegCount</sup>
</p>
<p>
@@ -823,11 +821,11 @@
</p>
<p>
- IsCompoundFile --> Int8
+ IsCompoundFile, HasSingleNormFile --> Int8
</p>
<p>
- Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1.
+ Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1.
</p>
<p>
@@ -881,6 +879,13 @@
exists.
</p>
+ <p>
+ If HasSingleNormFile is 1, then the field norms are
+ written as a single joined file (with extension
+ <tt>.nrm</tt>); if it is 0 then each field's norms
+ are stored as separate <tt>.fN</tt> files. See
+ "Normalization Factors" below for details.
+ </p>
</section>
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Tue Jan 16 12:24:52 2007
@@ -85,46 +85,34 @@
rmDir(dirName);
}
- public void testSearchOldIndexCFS() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
- unzip(dirName);
- searchIndex(dirName);
- rmDir(dirName);
+ public void testSearchOldIndex() throws IOException {
+ String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+ for(int i=0;i<oldNames.length;i++) {
+ String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+ unzip(dirName);
+ searchIndex(dirName);
+ rmDir(dirName);
+ }
}
- public void testIndexOldIndexCFSNoAdds() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
- unzip(dirName);
- changeIndexNoAdds(dirName);
- rmDir(dirName);
+ public void testIndexOldIndexNoAdds() throws IOException {
+ String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+ for(int i=0;i<oldNames.length;i++) {
+ String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+ unzip(dirName);
+ changeIndexNoAdds(dirName);
+ rmDir(dirName);
+ }
}
- public void testIndexOldIndexCFS() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
- unzip(dirName);
- changeIndexWithAdds(dirName);
- rmDir(dirName);
- }
-
- public void testSearchOldIndexNoCFS() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
- unzip(dirName);
- searchIndex(dirName);
- rmDir(dirName);
- }
-
- public void testIndexOldIndexNoCFS() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
- unzip(dirName);
- changeIndexWithAdds(dirName);
- rmDir(dirName);
- }
-
- public void testIndexOldIndexNoCFSNoAdds() throws IOException {
- String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
- unzip(dirName);
- changeIndexNoAdds(dirName);
- rmDir(dirName);
+ public void testIndexOldIndex() throws IOException {
+ String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+ for(int i=0;i<oldNames.length;i++) {
+ String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+ unzip(dirName);
+ changeIndexWithAdds(dirName);
+ rmDir(dirName);
+ }
}
public void searchIndex(String dirName) throws IOException {
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java Tue Jan 16 12:24:52 2007
@@ -108,21 +108,21 @@
Directory directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
- indexDoc("one", "test.txt");
- printSegment(out, "one", 1);
+ SegmentInfo si1 = indexDoc("one", "test.txt");
+ printSegment(out, si1);
- indexDoc("two", "test2.txt");
- printSegment(out, "two", 1);
+ SegmentInfo si2 = indexDoc("two", "test2.txt");
+ printSegment(out, si2);
- merge("one", 1, "two", 1, "merge", false);
- printSegment(out, "merge", 2);
+ SegmentInfo siMerge = merge(si1, si2, "merge", false);
+ printSegment(out, siMerge);
- merge("one", 1, "two", 1, "merge2", false);
- printSegment(out, "merge2", 2);
-
- merge("merge", 2, "merge2", 2, "merge3", false);
- printSegment(out, "merge3", 4);
+ SegmentInfo siMerge2 = merge(si1, si2, "merge2", false);
+ printSegment(out, siMerge2);
+ SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false);
+ printSegment(out, siMerge3);
+
out.close();
sw.close();
String multiFileOutput = sw.getBuffer().toString();
@@ -134,21 +134,21 @@
directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
- indexDoc("one", "test.txt");
- printSegment(out, "one", 1);
+ si1 = indexDoc("one", "test.txt");
+ printSegment(out, si1);
- indexDoc("two", "test2.txt");
- printSegment(out, "two", 1);
+ si2 = indexDoc("two", "test2.txt");
+ printSegment(out, si2);
- merge("one", 1, "two", 1, "merge", true);
- printSegment(out, "merge", 2);
+ siMerge = merge(si1, si2, "merge", true);
+ printSegment(out, siMerge);
- merge("one", 1, "two", 1, "merge2", true);
- printSegment(out, "merge2", 2);
-
- merge("merge", 2, "merge2", 2, "merge3", true);
- printSegment(out, "merge3", 4);
+ siMerge2 = merge(si1, si2, "merge2", true);
+ printSegment(out, siMerge2);
+ siMerge3 = merge(siMerge, siMerge2, "merge3", true);
+ printSegment(out, siMerge3);
+
out.close();
sw.close();
String singleFileOutput = sw.getBuffer().toString();
@@ -157,7 +157,7 @@
}
- private void indexDoc(String segment, String fileName)
+ private SegmentInfo indexDoc(String segment, String fileName)
throws Exception
{
Directory directory = FSDirectory.getDirectory(indexDir, false);
@@ -171,18 +171,18 @@
writer.addDocument(segment, doc);
directory.close();
+ return new SegmentInfo(segment, 1, directory, false, false);
}
- private void merge(String seg1, int docCount1, String seg2, int docCount2, String merged, boolean useCompoundFile)
+ private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile)
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
- SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, docCount1, directory));
- SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, docCount2, directory));
+ SegmentReader r1 = SegmentReader.get(si1);
+ SegmentReader r2 = SegmentReader.get(si2);
- SegmentMerger merger =
- new SegmentMerger(directory, merged);
+ SegmentMerger merger = new SegmentMerger(directory, merged);
merger.add(r1);
merger.add(r2);
@@ -196,14 +196,14 @@
}
directory.close();
+ return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true);
}
- private void printSegment(PrintWriter out, String segment, int docCount)
+ private void printSegment(PrintWriter out, SegmentInfo si)
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
- SegmentReader reader =
- SegmentReader.get(new SegmentInfo(segment, docCount, directory));
+ SegmentReader reader = SegmentReader.get(si);
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java Tue Jan 16 12:24:52 2007
@@ -70,7 +70,7 @@
merger.closeReaders();
assertTrue(docsMerged == 2);
//Should be able to open a new SegmentReader against the new directory
- SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
+ SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true));
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);