You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/10/04 14:41:54 UTC

svn commit: r1629380 - in /lucene/dev/branches/lucene5969/lucene/core/src: java/org/apache/lucene/index/ test/org/apache/lucene/index/

Author: rmuir
Date: Sat Oct  4 12:41:54 2014
New Revision: 1629380

URL: http://svn.apache.org/r1629380
Log:
LUCENE-5969: don't open the cfs file twice

Modified:
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java
    lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1629380&r1=1629379&r2=1629380&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Sat Oct  4 12:41:54 2014
@@ -41,6 +41,7 @@ import java.util.concurrent.atomic.Atomi
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FieldInfosReader;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
 import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
@@ -856,6 +857,28 @@ public class IndexWriter implements Clos
       }
     }
   }
+  
+  // reads latest field infos for the commit
+  // this is used on IW init and addIndexes(Dir) to create/update the global field map.
+  // TODO: fix tests abusing this method!
+  static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
+    Codec codec = si.info.getCodec();
+    FieldInfosReader reader = codec.fieldInfosFormat().getFieldInfosReader();
+    
+    if (si.hasFieldUpdates()) {
+      // there are updates, we read latest (always outside of CFS)
+      final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
+      return reader.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
+    } else if (si.info.getUseCompoundFile()) {
+      // cfs
+      try (Directory cfs = codec.compoundFormat().getCompoundReader(si.info.dir, si.info, IOContext.DEFAULT)) {
+        return reader.read(cfs, si.info, "", IOContext.READONCE);
+      }
+    } else {
+      // no cfs
+      return reader.read(si.info.dir, si.info, "", IOContext.READONCE);
+    }
+  }
 
   /**
    * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
@@ -865,7 +888,8 @@ public class IndexWriter implements Clos
     final FieldNumbers map = new FieldNumbers();
 
     for(SegmentCommitInfo info : segmentInfos) {
-      for(FieldInfo fi : SegmentReader.readFieldInfos(info)) {
+      FieldInfos fis = readFieldInfos(info);
+      for(FieldInfo fi : fis) {
         map.addOrGet(fi.name, fi.number, fi.getDocValuesType());
       }
     }
@@ -2379,7 +2403,8 @@ public class IndexWriter implements Clos
 
             IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1));
 
-            for(FieldInfo fi : SegmentReader.readFieldInfos(info)) {
+            FieldInfos fis = readFieldInfos(info);
+            for(FieldInfo fi : fis) {
               globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType());
             }
             infos.add(copySegmentAsIs(info, newSegName, context));

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java?rev=1629380&r1=1629379&r2=1629380&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java Sat Oct  4 12:41:54 2014
@@ -60,6 +60,11 @@ final class SegmentCoreReaders implement
   final StoredFieldsReader fieldsReaderOrig;
   final TermVectorsReader termVectorsReaderOrig;
   final Directory cfsReader;
+  /** 
+   * fieldinfos for this core: means gen=-1.
+   * this is the exact fieldinfos these codec components saw at write.
+   * in the case of DV updates, SR may hold a newer version. */
+  final FieldInfos coreFieldInfos;
 
   // TODO: make a single thread local w/ a
   // Thingy class holding fieldsReader, termVectorsReader,
@@ -104,9 +109,9 @@ final class SegmentCoreReaders implement
         cfsDir = dir;
       }
 
-      final FieldInfos fieldInfos = owner.fieldInfos;
+      coreFieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info, "", context);
       
-      final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context);
+      final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
       final PostingsFormat format = codec.postingsFormat();
       // Ask codec for its Fields
       fields = format.fieldsProducer(segmentReadState);
@@ -115,17 +120,17 @@ final class SegmentCoreReaders implement
       // TODO: since we don't write any norms file if there are no norms,
       // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!
 
-      if (fieldInfos.hasNorms()) {
+      if (coreFieldInfos.hasNorms()) {
         normsProducer = codec.normsFormat().normsProducer(segmentReadState);
         assert normsProducer != null;
       } else {
         normsProducer = null;
       }
   
-      fieldsReaderOrig = si.info.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si.info, fieldInfos, context);
+      fieldsReaderOrig = si.info.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si.info, coreFieldInfos, context);
 
-      if (fieldInfos.hasVectors()) { // open term vector files only as needed
-        termVectorsReaderOrig = si.info.getCodec().termVectorsFormat().vectorsReader(cfsDir, si.info, fieldInfos, context);
+      if (coreFieldInfos.hasVectors()) { // open term vector files only as needed
+        termVectorsReaderOrig = si.info.getCodec().termVectorsFormat().vectorsReader(cfsDir, si.info, coreFieldInfos, context);
       } else {
         termVectorsReaderOrig = null;
       }

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1629380&r1=1629379&r2=1629380&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java Sat Oct  4 12:41:54 2014
@@ -91,13 +91,6 @@ public final class SegmentReader extends
   // TODO: why is this public?
   public SegmentReader(SegmentCommitInfo si, IOContext context) throws IOException {
     this.si = si;
-    // TODO if the segment uses CFS, we may open the CFS file twice: once for
-    // reading the FieldInfos (if they are not gen'd) and second time by
-    // SegmentCoreReaders. We can open the CFS here and pass to SCR, but then it
-    // results in less readable code (resource not closed where it was opened).
-    // Best if we could somehow read FieldInfos in SCR but not keep it there, but
-    // constructors don't allow returning two things...
-    fieldInfos = readFieldInfos(si);
     core = new SegmentCoreReaders(this, si.info.dir, si, context);
     segDocValues = new SegmentDocValues();
     
@@ -112,12 +105,9 @@ public final class SegmentReader extends
         liveDocs = null;
       }
       numDocs = si.info.getDocCount() - si.getDelCount();
-
-      if (fieldInfos.hasDocValues()) {
-        docValuesProducer = initDocValuesProducer(codec);
-      } else {
-        docValuesProducer = null;
-      }
+      
+      fieldInfos = initFieldInfos();
+      docValuesProducer = initDocValuesProducer();
 
       success = true;
     } finally {
@@ -152,24 +142,11 @@ public final class SegmentReader extends
     this.core = sr.core;
     core.incRef();
     this.segDocValues = sr.segDocValues;
-    
-//    System.out.println("[" + Thread.currentThread().getName() + "] SR.init: sharing reader: " + sr + " for gens=" + sr.genDVProducers.keySet());
-    
-    // increment refCount of DocValuesProducers that are used by this reader
+
     boolean success = false;
     try {
-      final Codec codec = si.info.getCodec();
-      if (si.getFieldInfosGen() == -1) {
-        fieldInfos = sr.fieldInfos;
-      } else {
-        fieldInfos = readFieldInfos(si);
-      }
-      
-      if (fieldInfos.hasDocValues()) {
-        docValuesProducer = initDocValuesProducer(codec);
-      } else {
-        docValuesProducer = null;
-      }
+      fieldInfos = initFieldInfos();
+      docValuesProducer = initDocValuesProducer();
       success = true;
     } finally {
       if (!success) {
@@ -178,46 +155,34 @@ public final class SegmentReader extends
     }
   }
 
-  // initialize the per-field DocValuesProducer
-  private DocValuesProducer initDocValuesProducer(Codec codec) throws IOException {
+  /**
+   * init most recent DocValues for the current commit
+   */
+  private DocValuesProducer initDocValuesProducer() throws IOException {
     final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
-    final DocValuesFormat dvFormat = codec.docValuesFormat();
+    final DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
 
-    if (!si.hasFieldUpdates()) {
+    if (!fieldInfos.hasDocValues()) {
+      return null;
+    } else if (si.hasFieldUpdates()) {
+      return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
+    } else {
       // simple case, no DocValues updates
       return segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
-    } else {
-      return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
     }
   }
   
   /**
-   * Reads the most recent {@link FieldInfos} of the given segment info.
-   * 
-   * @lucene.internal
+   * init most recent FieldInfos for the current commit
    */
-  static FieldInfos readFieldInfos(SegmentCommitInfo info) throws IOException {
-    final Directory dir;
-    final boolean closeDir;
-    if (info.getFieldInfosGen() == -1 && info.info.getUseCompoundFile()) {
-      // no fieldInfos gen and segment uses a compound file
-      dir = info.info.getCodec().compoundFormat().getCompoundReader(info.info.dir, info.info, IOContext.READONCE);
-      closeDir = true;
+  private FieldInfos initFieldInfos() throws IOException {
+    if (!si.hasFieldUpdates()) {
+      return core.coreFieldInfos;
     } else {
-      // gen'd FIS are read outside CFS, or the segment doesn't use a compound file
-      dir = info.info.dir;
-      closeDir = false;
-    }
-    
-    try {
-      final String segmentSuffix = info.getFieldInfosGen() == -1 ? "" : Long.toString(info.getFieldInfosGen(), Character.MAX_RADIX);
-      Codec codec = info.info.getCodec();
-      FieldInfosFormat fisFormat = codec.fieldInfosFormat();
-      return fisFormat.getFieldInfosReader().read(dir, info.info, segmentSuffix, IOContext.READONCE);
-    } finally {
-      if (closeDir) {
-        dir.close();
-      }
+      // updates always outside of CFS
+      FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
+      final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
+      return fisFormat.getFieldInfosReader().read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
     }
   }
   

Modified: lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java?rev=1629380&r1=1629379&r2=1629380&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java Sat Oct  4 12:41:54 2014
@@ -68,8 +68,8 @@ public class TestConsistentFieldNumbers 
       sis.read(dir);
       assertEquals(2, sis.size());
 
-      FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
-      FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1));
+      FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
+      FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
 
       assertEquals("f1", fis1.fieldInfo(0).name);
       assertEquals("f2", fis1.fieldInfo(1).name);
@@ -86,7 +86,7 @@ public class TestConsistentFieldNumbers 
       sis.read(dir);
       assertEquals(1, sis.size());
 
-      FieldInfos fis3 = SegmentReader.readFieldInfos(sis.info(0));
+      FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(0));
 
       assertEquals("f1", fis3.fieldInfo(0).name);
       assertEquals("f2", fis3.fieldInfo(1).name);
@@ -134,8 +134,8 @@ public class TestConsistentFieldNumbers 
     sis.read(dir1);
     assertEquals(2, sis.size());
 
-    FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
-    FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1));
+    FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
+    FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
 
     assertEquals("f1", fis1.fieldInfo(0).name);
     assertEquals("f2", fis1.fieldInfo(1).name);
@@ -164,7 +164,7 @@ public class TestConsistentFieldNumbers 
         SegmentInfos sis = new SegmentInfos();
         sis.read(dir);
         assertEquals(1, sis.size());
-        FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
+        FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
         assertEquals("f1", fis1.fieldInfo(0).name);
         assertEquals("f2", fis1.fieldInfo(1).name);
       }
@@ -181,8 +181,8 @@ public class TestConsistentFieldNumbers 
         SegmentInfos sis = new SegmentInfos();
         sis.read(dir);
         assertEquals(2, sis.size());
-        FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
-        FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1));
+        FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
+        FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
         assertEquals("f1", fis1.fieldInfo(0).name);
         assertEquals("f2", fis1.fieldInfo(1).name);
         assertEquals("f1", fis2.fieldInfo(0).name);
@@ -202,9 +202,9 @@ public class TestConsistentFieldNumbers 
         SegmentInfos sis = new SegmentInfos();
         sis.read(dir);
         assertEquals(3, sis.size());
-        FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
-        FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1));
-        FieldInfos fis3 = SegmentReader.readFieldInfos(sis.info(2));
+        FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
+        FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
+        FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(2));
         assertEquals("f1", fis1.fieldInfo(0).name);
         assertEquals("f2", fis1.fieldInfo(1).name);
         assertEquals("f1", fis2.fieldInfo(0).name);
@@ -234,7 +234,7 @@ public class TestConsistentFieldNumbers 
       SegmentInfos sis = new SegmentInfos();
       sis.read(dir);
       assertEquals(1, sis.size());
-      FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0));
+      FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
       assertEquals("f1", fis1.fieldInfo(0).name);
       assertEquals("f2", fis1.fieldInfo(1).name);
       assertEquals("f3", fis1.fieldInfo(2).name);
@@ -272,7 +272,7 @@ public class TestConsistentFieldNumbers 
     SegmentInfos sis = new SegmentInfos();
     sis.read(dir);
     for (SegmentCommitInfo si : sis) {
-      FieldInfos fis = SegmentReader.readFieldInfos(si);
+      FieldInfos fis = IndexWriter.readFieldInfos(si);
 
       for (FieldInfo fi : fis) {
         Field expected = getField(Integer.parseInt(fi.name));

Modified: lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=1629380&r1=1629379&r2=1629380&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java Sat Oct  4 12:41:54 2014
@@ -129,7 +129,7 @@ public class TestTermVectorsReader exten
     seg = writer.newestSegment();
     writer.close();
 
-    fieldInfos = SegmentReader.readFieldInfos(seg);
+    fieldInfos = IndexWriter.readFieldInfos(seg);
   }
   
   @Override