You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/11 03:11:53 UTC

svn commit: r1200692 - in /lucene/dev/branches/lucene2621/lucene/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/ test/org/apache/lucene/index/

Author: rmuir
Date: Fri Nov 11 02:11:52 2011
New Revision: 1200692

URL: http://svn.apache.org/viewvc?rev=1200692&view=rev
Log:
LUCENE-2621: move term vectors filenames out of indexfilenames into codec

Added:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java
      - copied, changed from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
      - copied, changed from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
Removed:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
Modified:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java
    lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
    lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Nov 11 02:11:52 2011
@@ -46,7 +46,7 @@ import org.apache.lucene.util.InfoStream
  * which in turn processes the document and interacts with
  * other consumers in the indexing chain.  Certain
  * consumers, like {@link StoredFieldsConsumer} and {@link
- * TermVectorsTermsWriter}, digest a document and
+ * TermVectorsConsumer}, digest a document and
  * immediately write bytes to the "doc store" files (ie,
  * they do not consume RAM per document, except while they
  * are processing the document).

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Fri Nov 11 02:11:52 2011
@@ -74,7 +74,7 @@ public class DocumentsWriterPerThread {
 
     // Build up indexing chain:
 
-      final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriterPerThread);
+      final TermsHashConsumer termVectorsWriter = new TermVectorsConsumer(documentsWriterPerThread);
       final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
 
       final InvertedDocConsumer  termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true,

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Fri Nov 11 02:11:52 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.index;
 
 import java.util.regex.Pattern;
 
-import org.apache.lucene.index.codecs.PostingsFormat;  // for javadocs
+import org.apache.lucene.index.codecs.Codec;  // for javadocs
 
 // TODO: put all files under codec and remove all the static extensions here
 
@@ -33,7 +33,7 @@ import org.apache.lucene.index.codecs.Po
  * {@link #segmentFileName(String, String, String) segmentFileName}).
  *
  * <p><b>NOTE</b>: extensions used by codecs are not
- * listed here.  You must interact with the {@link PostingsFormat}
+ * listed here.  You must interact with the {@link Codec}
  * directly.
  *
  * @lucene.internal
@@ -53,15 +53,6 @@ public final class IndexFileNames {
   /** Extension of norms file */
   public static final String NORMS_EXTENSION = "nrm";
 
-  /** Extension of vectors fields file */
-  public static final String VECTORS_FIELDS_EXTENSION = "tvf";
-
-  /** Extension of vectors documents file */
-  public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
-
-  /** Extension of vectors index file */
-  public static final String VECTORS_INDEX_EXTENSION = "tvx";
-
   /** Extension of compound file */
   public static final String COMPOUND_FILE_EXTENSION = "cfs";
   
@@ -95,21 +86,12 @@ public final class IndexFileNames {
     COMPOUND_FILE_ENTRIES_EXTENSION,
     FIELD_INFOS_EXTENSION,
     DELETES_EXTENSION,
-    VECTORS_INDEX_EXTENSION,
-    VECTORS_DOCUMENTS_EXTENSION,
-    VECTORS_FIELDS_EXTENSION,
     GEN_EXTENSION,
     NORMS_EXTENSION,
     COMPOUND_FILE_STORE_EXTENSION,
     GLOBAL_FIELD_NUM_MAP_EXTENSION,
   };
 
-  public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
-    VECTORS_INDEX_EXTENSION,
-    VECTORS_FIELDS_EXTENSION,
-    VECTORS_DOCUMENTS_EXTENSION,
-  };
-
   public static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
     FIELD_INFOS_EXTENSION,
     NORMS_EXTENSION
@@ -118,13 +100,6 @@ public final class IndexFileNames {
   static final String COMPOUND_EXTENSIONS_NOT_CODEC[] = new String[] {
     FIELD_INFOS_EXTENSION,
   };
-  
-  /** File extensions for term vector support */
-  public static final String VECTOR_EXTENSIONS[] = new String[] {
-    VECTORS_INDEX_EXTENSION,
-    VECTORS_DOCUMENTS_EXTENSION,
-    VECTORS_FIELDS_EXTENSION
-  };
 
   /**
    * Computes the full file name from base, extension and generation. If the
@@ -157,21 +132,6 @@ public final class IndexFileNames {
   }
 
   /**
-   * Returns true if the provided filename is one of the doc store files (ends
-   * with an extension in {@link #STORE_INDEX_EXTENSIONS}).
-   */
-  // TODO: this method is stupid.
-  public static boolean isDocStoreFile(String fileName) {
-    if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION))
-      return true;
-    for (String ext : STORE_INDEX_EXTENSIONS) {
-      if (fileName.endsWith(ext))
-        return true;
-    }
-    return false;
-  }
-
-  /**
    * Returns a file name that includes the given segment name, your own custom
    * name and extension. The format of the filename is:
    * &lt;segmentName&gt;(_&lt;name&gt;)(.&lt;ext&gt;).

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Nov 11 02:11:52 2011
@@ -2594,6 +2594,7 @@ public class IndexWriter implements Clos
   }
   
   /** Copies the segment files as-is into the IndexWriter's directory. */
+  // nocommit: are we testing all the cases here?!
   private void copySegmentAsIs(SegmentInfo info, String segName,
       Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context)
       throws IOException {
@@ -2620,7 +2621,8 @@ public class IndexWriter implements Clos
     // Copy the segment files
     for (String file: info.files()) {
       final String newFileName;
-      if (IndexFileNames.isDocStoreFile(file)) {
+      // nocommit: this method is slow!
+      if (info.isDocStoreFile(file)) {
         newFileName = newDsName + IndexFileNames.stripSegmentName(file);
         if (dsFilesCopied.contains(newFileName)) {
           continue;

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri Nov 11 02:11:52 2011
@@ -29,6 +29,7 @@ import java.util.Set;
 
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
@@ -252,7 +253,8 @@ public final class SegmentInfo implement
         dirToTest = dir;
       }
       try {
-        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
+        // TODO: remove this manual file check or push to preflex codec
+        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION)) ? YES : NO;
       } finally {
         if (isCompoundFile) {
           dirToTest.close();
@@ -330,10 +332,11 @@ public final class SegmentInfo implement
   }
   
   // TODO: a little messy, but sizeInBytes above that uses this is the real problem.
-  private boolean isDocStoreFile(String fileName) throws IOException {
+  boolean isDocStoreFile(String fileName) throws IOException {
     Set<String> docStoreFiles = new HashSet<String>();
     codec.storedFieldsFormat().files(dir, this, docStoreFiles);
-    return IndexFileNames.isDocStoreFile(fileName) || docStoreFiles.contains(fileName);
+    codec.termVectorsFormat().files(dir, this, docStoreFiles);
+    return fileName.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION) || docStoreFiles.contains(fileName);
   }
 
   public boolean getHasVectors() throws IOException {
@@ -658,18 +661,6 @@ public final class SegmentInfo implement
       // TODO: push this out into preflex fieldsFormat?
       if (docStoreIsCompoundFile) {
         fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
-      } else {
-        if (getHasVectors()) {
-          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
-          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
-          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
-        }
-      }
-    } else if (!useCompoundFile) {
-      if (getHasVectors()) {
-        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
-        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
-        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
       }
     }
 

Copied: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java (from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java?p2=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java&p1=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java&r1=1200665&r2=1200692&rev=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java Fri Nov 11 02:11:52 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 
-final class TermVectorsTermsWriter extends TermsHashConsumer {
+final class TermVectorsConsumer extends TermsHashConsumer {
 
   TermVectorsWriter writer;
   final DocumentsWriterPerThread docWriter;
@@ -42,7 +42,7 @@ final class TermVectorsTermsWriter exten
   final ByteSliceReader vectorSliceReader = new ByteSliceReader();
   boolean hasVectors;
 
-  public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) {
+  public TermVectorsConsumer(DocumentsWriterPerThread docWriter) {
     this.docWriter = docWriter;
     docState = docWriter.docState;
   }
@@ -63,7 +63,7 @@ final class TermVectorsTermsWriter exten
     }
 
     for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
-      TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+      TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField) field;
       perField.termsHashPerField.reset();
       perField.shrinkHash();
     }
@@ -134,22 +134,22 @@ final class TermVectorsTermsWriter exten
 
   int numVectorFields;
 
-  TermVectorsTermsWriterPerField[] perFields;
+  TermVectorsConsumerPerField[] perFields;
 
   void reset() {
     numVectorFields = 0;
-    perFields = new TermVectorsTermsWriterPerField[1];
+    perFields = new TermVectorsConsumerPerField[1];
   }
 
   @Override
   public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
-    return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+    return new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo);
   }
 
-  void addFieldToFlush(TermVectorsTermsWriterPerField fieldToFlush) {
+  void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
     if (numVectorFields == perFields.length) {
       int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-      TermVectorsTermsWriterPerField[] newArray = new TermVectorsTermsWriterPerField[newSize];
+      TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
       System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
       perFields = newArray;
     }

Copied: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?p2=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java&p1=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java&r1=1200665&r2=1200692&rev=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Fri Nov 11 02:11:52 2011
@@ -25,10 +25,10 @@ import org.apache.lucene.util.ByteBlockP
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 
-final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
+final class TermVectorsConsumerPerField extends TermsHashConsumerPerField {
 
   final TermsHashPerField termsHashPerField;
-  final TermVectorsTermsWriter termsWriter;
+  final TermVectorsConsumer termsWriter;
   final FieldInfo fieldInfo;
   final DocumentsWriterPerThread.DocState docState;
   final FieldInvertState fieldState;
@@ -40,7 +40,7 @@ final class TermVectorsTermsWriterPerFie
   int maxNumPostings;
   OffsetAttribute offsetAttribute = null;
 
-  public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) {
+  public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;
     this.termsWriter = termsWriter;
     this.fieldInfo = fieldInfo;

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java Fri Nov 11 02:11:52 2011
@@ -29,7 +29,7 @@ import org.apache.lucene.util.BytesRef;
  *  field.  It stores these tokens in a hash table, and
  *  allocates separate byte streams per token.  Consumers of
  *  this class, eg {@link FreqProxTermsWriter} and {@link
- *  TermVectorsTermsWriter}, write their own byte streams
+ *  TermVectorsConsumer}, write their own byte streams
  *  under each term.
  */
 final class TermsHash extends InvertedDocConsumer {

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java Fri Nov 11 02:11:52 2011
@@ -46,8 +46,7 @@ public abstract class Codec implements N
   public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
     postingsFormat().files(dir, info, "", files);
     storedFieldsFormat().files(dir, info, files);
-    // nocommit: not yet
-    // termVectorsFormat().files(dir, info, files);
+    termVectorsFormat().files(dir, info, files);
     // TODO: segmentInfosFormat should be allowed to declare additional files
     // if it wants, in addition to segments_N
     docValuesFormat().files(dir, info, files);

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java Fri Nov 11 02:11:52 2011
@@ -39,6 +39,6 @@ public class DefaultTermVectorsFormat ex
 
   @Override
   public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
-    // nocommit impl
+    DefaultTermVectorsReader.files(dir, info, files);
   }
 }

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java Fri Nov 11 02:11:52 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexFormatTooNewException;
 import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermVectorMapper;
 import org.apache.lucene.index.TermVectorOffsetInfo;
@@ -34,6 +35,7 @@ import org.apache.lucene.util.IOUtils;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Set;
 
 public class DefaultTermVectorsReader extends TermVectorsReader {
 
@@ -45,8 +47,7 @@ public class DefaultTermVectorsReader ex
 
   // NOTE: always change this if you switch to a new format!
   // whenever you add a new format, make it 1 larger (positive version logic)!
-  // nocommit: not public
-  public static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
+  static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
   
   // when removing support for old versions, leave the last supported version here
   static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
@@ -54,11 +55,22 @@ public class DefaultTermVectorsReader ex
   //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
   static final int FORMAT_SIZE = 4;
 
-  // nocommit: not public
-  public static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
-  // nocommit: not public
-  public static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
+  static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
+
+  static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
   
+  /** Extension of vectors fields file */
+  // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+  public static final String VECTORS_FIELDS_EXTENSION = "tvf";
+
+  /** Extension of vectors documents file */
+  // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+  public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
+
+  /** Extension of vectors index file */
+  // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+  public static final String VECTORS_INDEX_EXTENSION = "tvx";
+
   private FieldInfos fieldInfos;
 
   private IndexInput tvx;
@@ -96,13 +108,13 @@ public class DefaultTermVectorsReader ex
     boolean success = false;
 
     try {
-      String idxName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
+      String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
       tvx = d.openInput(idxName, context);
       format = checkValidFormat(tvx, idxName);
-      String fn = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+      String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
       tvd = d.openInput(fn, context);
       final int tvdFormat = checkValidFormat(tvd, fn);
-      fn = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
+      fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
       tvf = d.openInput(fn, context);
       final int tvfFormat = checkValidFormat(tvf, fn);
 
@@ -501,5 +513,22 @@ public class DefaultTermVectorsReader ex
     
     return new DefaultTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format);
   }
+  
+  public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
+    if (info.getHasVectors()) {
+      if (info.getDocStoreOffset() != -1) {
+        assert info.getDocStoreSegment() != null;
+        if (!info.getDocStoreIsCompoundFile()) {
+          files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION));
+          files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION));
+          files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION));
+        }
+      } else {
+        files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION));
+        files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION));
+        files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION));
+      }
+    }
+  }
 }
 

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java Fri Nov 11 02:11:52 2011
@@ -48,11 +48,11 @@ public final class DefaultTermVectorsWri
     boolean success = false;
     try {
       // Open files for TermVector storage
-      tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION), context);
+      tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION), context);
       tvx.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
-      tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION), context);
+      tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
       tvd.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
-      tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION), context);
+      tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
       tvf.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
       success = true;
     } finally {
@@ -254,15 +254,15 @@ public final class DefaultTermVectorsWri
     } catch (IOException ignored) {}
     
     try {
-      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION));
     } catch (IOException ignored) {}
     
     try {
-      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
     } catch (IOException ignored) {}
     
     try {
-      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
+      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION));
     } catch (IOException ignored) {}
   }
 

Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Nov 11 02:11:52 2011
@@ -42,6 +42,7 @@ import org.apache.lucene.document.String
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
 import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
@@ -1736,10 +1737,11 @@ public class TestIndexWriter extends Luc
 
     assertNoUnreferencedFiles(dir, "no tv files");
     String[] files = dir.listAll();
+    // TODO: this check should use codec's files()
     for(String file : files) {
-      assertTrue(!file.endsWith(IndexFileNames.VECTORS_FIELDS_EXTENSION));
-      assertTrue(!file.endsWith(IndexFileNames.VECTORS_INDEX_EXTENSION));
-      assertTrue(!file.endsWith(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+      assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION));
+      assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION));
+      assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
     }
 
     dir.close();

Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Fri Nov 11 02:11:52 2011
@@ -538,7 +538,7 @@ public class TestIndexWriterExceptions e
         boolean sawAppend = false;
         boolean sawFlush = false;
         for (int i = 0; i < trace.length; i++) {
-          if ("org.apache.lucene.index.FreqProxTermsWriterPerField".equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
+          if (FreqProxTermsWriterPerField.class.getName().equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
             sawAppend = true;
           if ("flush".equals(trace[i].getMethodName()))
             sawFlush = true;
@@ -797,7 +797,7 @@ public class TestIndexWriterExceptions e
       if (doFail) {
         StackTraceElement[] trace = new Exception().getStackTrace();
         for (int i = 0; i < trace.length; i++) {
-          if (doFail && "org.apache.lucene.store.MockDirectoryWrapper".equals(trace[i].getClassName()) && "sync".equals(trace[i].getMethodName())) {
+          if (doFail && MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "sync".equals(trace[i].getMethodName())) {
             didFail = true;
             throw new IOException("now failing on purpose during sync");
           }
@@ -871,11 +871,11 @@ public class TestIndexWriterExceptions e
       boolean isDelete = false;
       boolean isInGlobalFieldMap = false;
       for (int i = 0; i < trace.length; i++) {
-        if ("org.apache.lucene.index.SegmentInfos".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+        if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
           isCommit = true;
-        if ("org.apache.lucene.store.MockDirectoryWrapper".equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName()))
+        if (MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName()))
           isDelete = true;
-        if ("org.apache.lucene.index.SegmentInfos".equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName()))
+        if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName()))
           isInGlobalFieldMap = true;
           
       }
@@ -1309,9 +1309,9 @@ public class TestIndexWriterExceptions e
       boolean failOnInit = false;
       boolean failOnfinish = false;
       for (int i = 0; i < trace.length; i++) {
-        if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+        if (TermVectorsConsumer.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
           failOnInit = true;
-        if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+        if (TermVectorsConsumer.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
           failOnfinish = true;
       }
       

Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java Fri Nov 11 02:11:52 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -185,8 +186,10 @@ public class TestTermVectorsReader exten
 
   public void test() throws IOException {
     //Check to see the files were created properly in setup
-    assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)));
-    assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", IndexFileNames.VECTORS_INDEX_EXTENSION)));
+    // TODO: fix this or at least add 'or' simpletext's extension
+    assumeFalse("test makes assumptions about filenames", Codec.getDefault().getName().equals("SimpleText"));
+    assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)));
+    assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION)));
   }
 
   public void testReader() throws IOException {