You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/01/16 21:24:53 UTC

svn commit: r496851 - in /lucene/java/trunk/src: java/org/apache/lucene/index/ site/src/documentation/content/xdocs/ test/org/apache/lucene/index/

Author: mikemccand
Date: Tue Jan 16 12:24:52 2007
New Revision: 496851

URL: http://svn.apache.org/viewvc?view=rev&rev=496851
Log:
LUCENE-756: small improvement to not rely on IO operation (fileExists)
to check whether a "single norm file" is in use for the segment.
Instead, save this information per segment explicitly into the segment
infos file.  Also renamed to "singleNormFile".

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
    lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java Tue Jan 16 12:24:52 2007
@@ -44,10 +44,11 @@
                                                   // pre-2.1 (ie, must check file system to see
                                                   // if <name>.cfs and <name>.nrm exist)         
 
-  private byte withNrm;                           // 1 if this segment maintains norms in a single file; 
-                                                  // -1 if not; 0 if check file is required to tell.
-                                                  // would be -1 for segments populated by DocumentWriter.
-                                                  // would be 1 for (newly created) merge resulted segments (both compound and non compound).
+  private boolean hasSingleNormFile;              // true if this segment maintains norms in a single file; 
+                                                  // false otherwise
+                                                  // this is currently false for segments populated by DocumentWriter
+                                                  // and true for newly created merged segments (both
+                                                  // compound and non compound).
   
   public SegmentInfo(String name, int docCount, Directory dir) {
     this.name = name;
@@ -56,13 +57,13 @@
     delGen = -1;
     isCompoundFile = 0;
     preLockless = true;
-    withNrm = 0;
+    hasSingleNormFile = false;
   }
 
-  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) { 
+  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { 
     this(name, docCount, dir);
     this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1);
-    this.withNrm = (byte) (withNrm ? 1 : -1);
+    this.hasSingleNormFile = hasSingleNormFile;
     preLockless = false;
   }
 
@@ -82,7 +83,7 @@
       System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
     }
     isCompoundFile = src.isCompoundFile;
-    withNrm = src.withNrm;
+    hasSingleNormFile = src.hasSingleNormFile;
   }
 
   /**
@@ -99,6 +100,11 @@
     docCount = input.readInt();
     if (format <= SegmentInfos.FORMAT_LOCKLESS) {
       delGen = input.readLong();
+      if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
+        hasSingleNormFile = (1 == input.readByte());
+      } else {
+        hasSingleNormFile = false;
+      }
       int numNormGen = input.readInt();
       if (numNormGen == -1) {
         normGen = null;
@@ -115,8 +121,8 @@
       normGen = null;
       isCompoundFile = 0;
       preLockless = true;
+      hasSingleNormFile = false;
     }
-    withNrm = 0;
   }
   
   void setNumFields(int numFields) {
@@ -179,7 +185,7 @@
     si.isCompoundFile = isCompoundFile;
     si.delGen = delGen;
     si.preLockless = preLockless;
-    si.withNrm = withNrm;
+    si.hasSingleNormFile = hasSingleNormFile;
     if (normGen != null) {
       si.normGen = (long[]) normGen.clone();
     }
@@ -297,7 +303,7 @@
       return IndexFileNames.fileNameFromGeneration(name, prefix + number, gen);
     }
 
-    if (withNrm()) {
+    if (hasSingleNormFile) {
       // case 2: lockless (or nrm file exists) - single file for all norms 
       prefix = "." + IndexFileNames.NORMS_EXTENSION;
       return IndexFileNames.fileNameFromGeneration(name, prefix, 0);
@@ -337,31 +343,6 @@
   }
   
   /**
-   * Returns true iff this segment stores field norms in a single .nrm file.
-   */
-  private boolean withNrm () throws IOException {
-    if (withNrm == -1) {
-      return false;
-    } 
-    if (withNrm == 1) {
-      return true;
-    }
-    Directory d = dir;
-    try {
-      if (getUseCompoundFile()) {
-        d = new CompoundFileReader(dir, name + ".cfs");
-      }
-      boolean res = d.fileExists(name + "." + IndexFileNames.NORMS_EXTENSION);
-      withNrm = (byte) (res ? 1 : -1); // avoid more file tests like this 
-      return res;
-    } finally {
-      if (d!=dir && d!=null) {
-        d.close();
-      }
-    }
-  }
-
-  /**
    * Save this segment's info.
    */
   void write(IndexOutput output)
@@ -369,6 +350,7 @@
     output.writeString(name);
     output.writeInt(docCount);
     output.writeLong(delGen);
+    output.writeByte((byte) (hasSingleNormFile ? 1:0));
     if (normGen == null) {
       output.writeInt(-1);
     } else {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java Tue Jan 16 12:24:52 2007
@@ -33,7 +33,7 @@
   /* Works since counter, the old 1st entry, is always >= 0 */
   public static final int FORMAT = -1;
 
-  /** This is the current file format written.  It differs
+  /** This format adds details used for lockless commits.  It differs
    * slightly from the previous format in that file names
    * are never re-used (write once).  Instead, each file is
    * written to the next generation.  For example,
@@ -44,6 +44,13 @@
    */
   public static final int FORMAT_LOCKLESS = -2;
 
+  /** This is the current file format written.  It adds a
+   * "hasSingleNormFile" flag into each segment info.
+   * See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
+   * for details.
+   */
+  public static final int FORMAT_SINGLE_NORM_FILE = -3;
+
   public int counter = 0;    // used to name new segments
   /**
    * counts how often the index has been changed by adding or deleting docs.
@@ -184,7 +191,7 @@
       int format = input.readInt();
       if(format < 0){     // file contains explicit format info
         // check that it is a format we can understand
-        if (format < FORMAT_LOCKLESS)
+        if (format < FORMAT_SINGLE_NORM_FILE)
           throw new IOException("Unknown format version: " + format);
         version = input.readLong(); // read version
         counter = input.readInt(); // read counter
@@ -245,7 +252,7 @@
     IndexOutput output = directory.createOutput(segmentFileName);
 
     try {
-      output.writeInt(FORMAT_LOCKLESS); // write FORMAT
+      output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
       output.writeLong(++version); // every write changes
                                    // the index
       output.writeInt(counter); // write counter
@@ -311,7 +318,7 @@
           try {
             format = input.readInt();
             if(format < 0){
-              if (format < FORMAT_LOCKLESS)
+              if (format < FORMAT_SINGLE_NORM_FILE)
                 throw new IOException("Unknown format version: " + format);
               version = input.readLong(); // read version
             }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Tue Jan 16 12:24:52 2007
@@ -321,7 +321,7 @@
           if (addedNrm) continue; // add .nrm just once
           addedNrm = true;
         }
-            files.addElement(name);
+        files.addElement(name);
       }
     }
     return files;

Modified: lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml Tue Jan 16 12:24:52 2007
@@ -762,8 +762,8 @@
                 <p>
                     The active segments in the index are stored in the
                     segment info file,
-                    <tt>segments_N</tt>
-                    . There may
+                    <tt>segments_N</tt>.
+                    There may
                     be one or more
                     <tt>segments_N</tt>
                     files in the
@@ -779,13 +779,13 @@
 
                 <p>
                     As of 2.1, there is also a file
-                    <tt>segments.gen</tt>
-                    . This file contains the
+                    <tt>segments.gen</tt>.
+                    This file contains the
                     current generation (the
                     <tt>_N</tt>
                     in
-                    <tt>segments_N</tt>
-                    ) of the index. This is
+                    <tt>segments_N</tt>)
+                    of the index. This is
                     used only as a fallback in case the current
                     generation cannot be accurately determined by
                     directory listing alone (as is the case for some
@@ -803,11 +803,9 @@
                 </p>
                 <p>
                     <b>2.1 and above:</b>
-                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, NumField, NormGen
-                    <sup>NumField</sup>
-                    &gt;
-                    <sup>SegCount</sup>
-                    , IsCompoundFile
+                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, HasSingleNormFile, NumField,
+                    NormGen<sup>NumField</sup>,
+                    IsCompoundFile&gt;<sup>SegCount</sup>
                 </p>
 
                 <p>
@@ -823,11 +821,11 @@
                 </p>
 
                 <p>
-                    IsCompoundFile --&gt; Int8
+                    IsCompoundFile, HasSingleNormFile --&gt; Int8
                 </p>
 
                 <p>
-                    Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1.
+                    Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1.
                 </p>
 
                 <p>
@@ -881,6 +879,13 @@
                     exists.
                 </p>
 
+                <p>
+                    If HasSingleNormFile is 1, then the field norms are
+                    written as a single joined file (with extension
+                    <tt>.nrm</tt>); if it is 0 then each field's norms
+                    are stored as separate <tt>.fN</tt> files.  See
+                    "Normalization Factors" below for details.
+                </p>
 
             </section>
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Tue Jan 16 12:24:52 2007
@@ -85,46 +85,34 @@
     rmDir(dirName);
   }
 
-  public void testSearchOldIndexCFS() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
-    unzip(dirName);
-    searchIndex(dirName);
-    rmDir(dirName);
+  public void testSearchOldIndex() throws IOException {
+    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+    for(int i=0;i<oldNames.length;i++) {
+      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+      unzip(dirName);
+      searchIndex(dirName);
+      rmDir(dirName);
+    }
   }
 
-  public void testIndexOldIndexCFSNoAdds() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
-    unzip(dirName);
-    changeIndexNoAdds(dirName);
-    rmDir(dirName);
+  public void testIndexOldIndexNoAdds() throws IOException {
+    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+    for(int i=0;i<oldNames.length;i++) {
+      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+      unzip(dirName);
+      changeIndexNoAdds(dirName);
+      rmDir(dirName);
+    }
   }
 
-  public void testIndexOldIndexCFS() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
-    unzip(dirName);
-    changeIndexWithAdds(dirName);
-    rmDir(dirName);
-  }
-
-  public void testSearchOldIndexNoCFS() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
-    unzip(dirName);
-    searchIndex(dirName);
-    rmDir(dirName);
-  }
-
-  public void testIndexOldIndexNoCFS() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
-    unzip(dirName);
-    changeIndexWithAdds(dirName);
-    rmDir(dirName);
-  }
-
-  public void testIndexOldIndexNoCFSNoAdds() throws IOException {
-    String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
-    unzip(dirName);
-    changeIndexNoAdds(dirName);
-    rmDir(dirName);
+  public void testIndexOldIndex() throws IOException {
+    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
+    for(int i=0;i<oldNames.length;i++) {
+      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
+      unzip(dirName);
+      changeIndexWithAdds(dirName);
+      rmDir(dirName);
+    }
   }
 
   public void searchIndex(String dirName) throws IOException {

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java Tue Jan 16 12:24:52 2007
@@ -108,21 +108,21 @@
       Directory directory = FSDirectory.getDirectory(indexDir, true);
       directory.close();
 
-      indexDoc("one", "test.txt");
-      printSegment(out, "one", 1);
+      SegmentInfo si1 = indexDoc("one", "test.txt");
+      printSegment(out, si1);
 
-      indexDoc("two", "test2.txt");
-      printSegment(out, "two", 1);
+      SegmentInfo si2 = indexDoc("two", "test2.txt");
+      printSegment(out, si2);
 
-      merge("one", 1, "two", 1, "merge", false);
-      printSegment(out, "merge", 2);
+      SegmentInfo siMerge = merge(si1, si2, "merge", false);
+      printSegment(out, siMerge);
 
-      merge("one", 1, "two", 1, "merge2", false);
-      printSegment(out, "merge2", 2);
-
-      merge("merge", 2, "merge2", 2, "merge3", false);
-      printSegment(out, "merge3", 4);
+      SegmentInfo siMerge2 = merge(si1, si2, "merge2", false);
+      printSegment(out, siMerge2);
 
+      SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false);
+      printSegment(out, siMerge3);
+      
       out.close();
       sw.close();
       String multiFileOutput = sw.getBuffer().toString();
@@ -134,21 +134,21 @@
       directory = FSDirectory.getDirectory(indexDir, true);
       directory.close();
 
-      indexDoc("one", "test.txt");
-      printSegment(out, "one", 1);
+      si1 = indexDoc("one", "test.txt");
+      printSegment(out, si1);
 
-      indexDoc("two", "test2.txt");
-      printSegment(out, "two", 1);
+      si2 = indexDoc("two", "test2.txt");
+      printSegment(out, si2);
 
-      merge("one", 1, "two", 1, "merge", true);
-      printSegment(out, "merge", 2);
+      siMerge = merge(si1, si2, "merge", true);
+      printSegment(out, siMerge);
 
-      merge("one", 1, "two", 1, "merge2", true);
-      printSegment(out, "merge2", 2);
-
-      merge("merge", 2, "merge2", 2, "merge3", true);
-      printSegment(out, "merge3", 4);
+      siMerge2 = merge(si1, si2, "merge2", true);
+      printSegment(out, siMerge2);
 
+      siMerge3 = merge(siMerge, siMerge2, "merge3", true);
+      printSegment(out, siMerge3);
+      
       out.close();
       sw.close();
       String singleFileOutput = sw.getBuffer().toString();
@@ -157,7 +157,7 @@
    }
 
 
-   private void indexDoc(String segment, String fileName)
+   private SegmentInfo indexDoc(String segment, String fileName)
    throws Exception
    {
       Directory directory = FSDirectory.getDirectory(indexDir, false);
@@ -171,18 +171,18 @@
       writer.addDocument(segment, doc);
 
       directory.close();
+      return new SegmentInfo(segment, 1, directory, false, false);
    }
 
 
-   private void merge(String seg1, int docCount1, String seg2, int docCount2, String merged, boolean useCompoundFile)
+   private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile)
    throws Exception {
       Directory directory = FSDirectory.getDirectory(indexDir, false);
 
-      SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, docCount1, directory));
-      SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, docCount2, directory));
+      SegmentReader r1 = SegmentReader.get(si1);
+      SegmentReader r2 = SegmentReader.get(si2);
 
-      SegmentMerger merger =
-        new SegmentMerger(directory, merged);
+      SegmentMerger merger = new SegmentMerger(directory, merged);
 
       merger.add(r1);
       merger.add(r2);
@@ -196,14 +196,14 @@
       }
 
       directory.close();
+      return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true);
    }
 
 
-   private void printSegment(PrintWriter out, String segment, int docCount)
+   private void printSegment(PrintWriter out, SegmentInfo si)
    throws Exception {
       Directory directory = FSDirectory.getDirectory(indexDir, false);
-      SegmentReader reader =
-        SegmentReader.get(new SegmentInfo(segment, docCount, directory));
+      SegmentReader reader = SegmentReader.get(si);
 
       for (int i = 0; i < reader.numDocs(); i++)
         out.println(reader.document(i));

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java?view=diff&rev=496851&r1=496850&r2=496851
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java Tue Jan 16 12:24:52 2007
@@ -70,7 +70,7 @@
     merger.closeReaders();
     assertTrue(docsMerged == 2);
     //Should be able to open a new SegmentReader against the new directory
-    SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
+    SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true));
     assertTrue(mergedReader != null);
     assertTrue(mergedReader.numDocs() == 2);
     Document newDoc1 = mergedReader.document(0);