You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/11 03:11:53 UTC
svn commit: r1200692 - in /lucene/dev/branches/lucene2621/lucene/src:
java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/
test/org/apache/lucene/index/
Author: rmuir
Date: Fri Nov 11 02:11:52 2011
New Revision: 1200692
URL: http://svn.apache.org/viewvc?rev=1200692&view=rev
Log:
LUCENE-2621: move term vectors filenames out of indexfilenames into codec
Added:
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java
- copied, changed from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
- copied, changed from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
Removed:
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
Modified:
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Nov 11 02:11:52 2011
@@ -46,7 +46,7 @@ import org.apache.lucene.util.InfoStream
* which in turn processes the document and interacts with
* other consumers in the indexing chain. Certain
* consumers, like {@link StoredFieldsConsumer} and {@link
- * TermVectorsTermsWriter}, digest a document and
+ * TermVectorsConsumer}, digest a document and
* immediately write bytes to the "doc store" files (ie,
* they do not consume RAM per document, except while they
* are processing the document).
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Fri Nov 11 02:11:52 2011
@@ -74,7 +74,7 @@ public class DocumentsWriterPerThread {
// Build up indexing chain:
- final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriterPerThread);
+ final TermsHashConsumer termVectorsWriter = new TermVectorsConsumer(documentsWriterPerThread);
final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true,
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Fri Nov 11 02:11:52 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.index;
import java.util.regex.Pattern;
-import org.apache.lucene.index.codecs.PostingsFormat; // for javadocs
+import org.apache.lucene.index.codecs.Codec; // for javadocs
// TODO: put all files under codec and remove all the static extensions here
@@ -33,7 +33,7 @@ import org.apache.lucene.index.codecs.Po
* {@link #segmentFileName(String, String, String) segmentFileName}).
*
* <p><b>NOTE</b>: extensions used by codecs are not
- * listed here. You must interact with the {@link PostingsFormat}
+ * listed here. You must interact with the {@link Codec}
* directly.
*
* @lucene.internal
@@ -53,15 +53,6 @@ public final class IndexFileNames {
/** Extension of norms file */
public static final String NORMS_EXTENSION = "nrm";
- /** Extension of vectors fields file */
- public static final String VECTORS_FIELDS_EXTENSION = "tvf";
-
- /** Extension of vectors documents file */
- public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
-
- /** Extension of vectors index file */
- public static final String VECTORS_INDEX_EXTENSION = "tvx";
-
/** Extension of compound file */
public static final String COMPOUND_FILE_EXTENSION = "cfs";
@@ -95,21 +86,12 @@ public final class IndexFileNames {
COMPOUND_FILE_ENTRIES_EXTENSION,
FIELD_INFOS_EXTENSION,
DELETES_EXTENSION,
- VECTORS_INDEX_EXTENSION,
- VECTORS_DOCUMENTS_EXTENSION,
- VECTORS_FIELDS_EXTENSION,
GEN_EXTENSION,
NORMS_EXTENSION,
COMPOUND_FILE_STORE_EXTENSION,
GLOBAL_FIELD_NUM_MAP_EXTENSION,
};
- public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
- VECTORS_INDEX_EXTENSION,
- VECTORS_FIELDS_EXTENSION,
- VECTORS_DOCUMENTS_EXTENSION,
- };
-
public static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
FIELD_INFOS_EXTENSION,
NORMS_EXTENSION
@@ -118,13 +100,6 @@ public final class IndexFileNames {
static final String COMPOUND_EXTENSIONS_NOT_CODEC[] = new String[] {
FIELD_INFOS_EXTENSION,
};
-
- /** File extensions for term vector support */
- public static final String VECTOR_EXTENSIONS[] = new String[] {
- VECTORS_INDEX_EXTENSION,
- VECTORS_DOCUMENTS_EXTENSION,
- VECTORS_FIELDS_EXTENSION
- };
/**
* Computes the full file name from base, extension and generation. If the
@@ -157,21 +132,6 @@ public final class IndexFileNames {
}
/**
- * Returns true if the provided filename is one of the doc store files (ends
- * with an extension in {@link #STORE_INDEX_EXTENSIONS}).
- */
- // TODO: this method is stupid.
- public static boolean isDocStoreFile(String fileName) {
- if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION))
- return true;
- for (String ext : STORE_INDEX_EXTENSIONS) {
- if (fileName.endsWith(ext))
- return true;
- }
- return false;
- }
-
- /**
* Returns a file name that includes the given segment name, your own custom
* name and extension. The format of the filename is:
* <segmentName>(_<name>)(.<ext>).
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Nov 11 02:11:52 2011
@@ -2594,6 +2594,7 @@ public class IndexWriter implements Clos
}
/** Copies the segment files as-is into the IndexWriter's directory. */
+ // nocommit: are we testing all the cases here?!
private void copySegmentAsIs(SegmentInfo info, String segName,
Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context)
throws IOException {
@@ -2620,7 +2621,8 @@ public class IndexWriter implements Clos
// Copy the segment files
for (String file: info.files()) {
final String newFileName;
- if (IndexFileNames.isDocStoreFile(file)) {
+ // nocommit: this method is slow!
+ if (info.isDocStoreFile(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) {
continue;
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri Nov 11 02:11:52 2011
@@ -29,6 +29,7 @@ import java.util.Set;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -252,7 +253,8 @@ public final class SegmentInfo implement
dirToTest = dir;
}
try {
- hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
+ // TODO: remove this manual file check or push to preflex codec
+ hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION)) ? YES : NO;
} finally {
if (isCompoundFile) {
dirToTest.close();
@@ -330,10 +332,11 @@ public final class SegmentInfo implement
}
// TODO: a little messy, but sizeInBytes above that uses this is the real problem.
- private boolean isDocStoreFile(String fileName) throws IOException {
+ boolean isDocStoreFile(String fileName) throws IOException {
Set<String> docStoreFiles = new HashSet<String>();
codec.storedFieldsFormat().files(dir, this, docStoreFiles);
- return IndexFileNames.isDocStoreFile(fileName) || docStoreFiles.contains(fileName);
+ codec.termVectorsFormat().files(dir, this, docStoreFiles);
+ return fileName.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION) || docStoreFiles.contains(fileName);
}
public boolean getHasVectors() throws IOException {
@@ -658,18 +661,6 @@ public final class SegmentInfo implement
// TODO: push this out into preflex fieldsFormat?
if (docStoreIsCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
- } else {
- if (getHasVectors()) {
- fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
- fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
- fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
- }
- }
- } else if (!useCompoundFile) {
- if (getHasVectors()) {
- fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
- fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
- fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
}
}
Copied: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java (from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java?p2=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java&p1=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java&r1=1200665&r2=1200692&rev=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumer.java Fri Nov 11 02:11:52 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
-final class TermVectorsTermsWriter extends TermsHashConsumer {
+final class TermVectorsConsumer extends TermsHashConsumer {
TermVectorsWriter writer;
final DocumentsWriterPerThread docWriter;
@@ -42,7 +42,7 @@ final class TermVectorsTermsWriter exten
final ByteSliceReader vectorSliceReader = new ByteSliceReader();
boolean hasVectors;
- public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) {
+ public TermVectorsConsumer(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
docState = docWriter.docState;
}
@@ -63,7 +63,7 @@ final class TermVectorsTermsWriter exten
}
for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
- TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+ TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField) field;
perField.termsHashPerField.reset();
perField.shrinkHash();
}
@@ -134,22 +134,22 @@ final class TermVectorsTermsWriter exten
int numVectorFields;
- TermVectorsTermsWriterPerField[] perFields;
+ TermVectorsConsumerPerField[] perFields;
void reset() {
numVectorFields = 0;
- perFields = new TermVectorsTermsWriterPerField[1];
+ perFields = new TermVectorsConsumerPerField[1];
}
@Override
public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
- return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ return new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo);
}
- void addFieldToFlush(TermVectorsTermsWriterPerField fieldToFlush) {
+ void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
if (numVectorFields == perFields.length) {
int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
- TermVectorsTermsWriterPerField[] newArray = new TermVectorsTermsWriterPerField[newSize];
+ TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
perFields = newArray;
}
Copied: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (from r1200665, lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?p2=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java&p1=lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java&r1=1200665&r2=1200692&rev=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Fri Nov 11 02:11:52 2011
@@ -25,10 +25,10 @@ import org.apache.lucene.util.ByteBlockP
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
-final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
+final class TermVectorsConsumerPerField extends TermsHashConsumerPerField {
final TermsHashPerField termsHashPerField;
- final TermVectorsTermsWriter termsWriter;
+ final TermVectorsConsumer termsWriter;
final FieldInfo fieldInfo;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
@@ -40,7 +40,7 @@ final class TermVectorsTermsWriterPerFie
int maxNumPostings;
OffsetAttribute offsetAttribute = null;
- public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) {
+ public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
this.termsWriter = termsWriter;
this.fieldInfo = fieldInfo;
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/TermsHash.java Fri Nov 11 02:11:52 2011
@@ -29,7 +29,7 @@ import org.apache.lucene.util.BytesRef;
* field. It stores these tokens in a hash table, and
* allocates separate byte streams per token. Consumers of
* this class, eg {@link FreqProxTermsWriter} and {@link
- * TermVectorsTermsWriter}, write their own byte streams
+ * TermVectorsConsumer}, write their own byte streams
* under each term.
*/
final class TermsHash extends InvertedDocConsumer {
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/Codec.java Fri Nov 11 02:11:52 2011
@@ -46,8 +46,7 @@ public abstract class Codec implements N
public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
postingsFormat().files(dir, info, "", files);
storedFieldsFormat().files(dir, info, files);
- // nocommit: not yet
- // termVectorsFormat().files(dir, info, files);
+ termVectorsFormat().files(dir, info, files);
// TODO: segmentInfosFormat should be allowed to declare additional files
// if it wants, in addition to segments_N
docValuesFormat().files(dir, info, files);
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsFormat.java Fri Nov 11 02:11:52 2011
@@ -39,6 +39,6 @@ public class DefaultTermVectorsFormat ex
@Override
public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- // nocommit impl
+ DefaultTermVectorsReader.files(dir, info, files);
}
}
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java Fri Nov 11 02:11:52 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.TermVectorOffsetInfo;
@@ -34,6 +35,7 @@ import org.apache.lucene.util.IOUtils;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Set;
public class DefaultTermVectorsReader extends TermVectorsReader {
@@ -45,8 +47,7 @@ public class DefaultTermVectorsReader ex
// NOTE: always change this if you switch to a new format!
// whenever you add a new format, make it 1 larger (positive version logic)!
- // nocommit: not public
- public static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
+ static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
@@ -54,11 +55,22 @@ public class DefaultTermVectorsReader ex
//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
static final int FORMAT_SIZE = 4;
- // nocommit: not public
- public static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
- // nocommit: not public
- public static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
+ static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
+
+ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
+ /** Extension of vectors fields file */
+ // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+ public static final String VECTORS_FIELDS_EXTENSION = "tvf";
+
+ /** Extension of vectors documents file */
+ // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+ public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
+
+ /** Extension of vectors index file */
+ // TODO: make pkg-private after we remove/refactor fileExists check in SI or drop 3.x support
+ public static final String VECTORS_INDEX_EXTENSION = "tvx";
+
private FieldInfos fieldInfos;
private IndexInput tvx;
@@ -96,13 +108,13 @@ public class DefaultTermVectorsReader ex
boolean success = false;
try {
- String idxName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
+ String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
tvx = d.openInput(idxName, context);
format = checkValidFormat(tvx, idxName);
- String fn = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+ String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
tvd = d.openInput(fn, context);
final int tvdFormat = checkValidFormat(tvd, fn);
- fn = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
+ fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
tvf = d.openInput(fn, context);
final int tvfFormat = checkValidFormat(tvf, fn);
@@ -501,5 +513,22 @@ public class DefaultTermVectorsReader ex
return new DefaultTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format);
}
+
+ public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
+ if (info.getHasVectors()) {
+ if (info.getDocStoreOffset() != -1) {
+ assert info.getDocStoreSegment() != null;
+ if (!info.getDocStoreIsCompoundFile()) {
+ files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION));
+ }
+ } else {
+ files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION));
+ }
+ }
+ }
}
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsWriter.java Fri Nov 11 02:11:52 2011
@@ -48,11 +48,11 @@ public final class DefaultTermVectorsWri
boolean success = false;
try {
// Open files for TermVector storage
- tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION), context);
+ tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION), context);
tvx.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
- tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION), context);
+ tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
tvd.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
- tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION), context);
+ tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
tvf.writeInt(DefaultTermVectorsReader.FORMAT_CURRENT);
success = true;
} finally {
@@ -254,15 +254,15 @@ public final class DefaultTermVectorsWri
} catch (IOException ignored) {}
try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+ directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION));
} catch (IOException ignored) {}
try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+ directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
} catch (IOException ignored) {}
try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
+ directory.deleteFile(IndexFileNames.segmentFileName(segment, "", DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION));
} catch (IOException ignored) {}
}
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Nov 11 02:11:52 2011
@@ -42,6 +42,7 @@ import org.apache.lucene.document.String
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
@@ -1736,10 +1737,11 @@ public class TestIndexWriter extends Luc
assertNoUnreferencedFiles(dir, "no tv files");
String[] files = dir.listAll();
+ // TODO: this check should use codec's files()
for(String file : files) {
- assertTrue(!file.endsWith(IndexFileNames.VECTORS_FIELDS_EXTENSION));
- assertTrue(!file.endsWith(IndexFileNames.VECTORS_INDEX_EXTENSION));
- assertTrue(!file.endsWith(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+ assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_FIELDS_EXTENSION));
+ assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION));
+ assertTrue(!file.endsWith(DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
dir.close();
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Fri Nov 11 02:11:52 2011
@@ -538,7 +538,7 @@ public class TestIndexWriterExceptions e
boolean sawAppend = false;
boolean sawFlush = false;
for (int i = 0; i < trace.length; i++) {
- if ("org.apache.lucene.index.FreqProxTermsWriterPerField".equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
+ if (FreqProxTermsWriterPerField.class.getName().equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
sawAppend = true;
if ("flush".equals(trace[i].getMethodName()))
sawFlush = true;
@@ -797,7 +797,7 @@ public class TestIndexWriterExceptions e
if (doFail) {
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
- if (doFail && "org.apache.lucene.store.MockDirectoryWrapper".equals(trace[i].getClassName()) && "sync".equals(trace[i].getMethodName())) {
+ if (doFail && MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "sync".equals(trace[i].getMethodName())) {
didFail = true;
throw new IOException("now failing on purpose during sync");
}
@@ -871,11 +871,11 @@ public class TestIndexWriterExceptions e
boolean isDelete = false;
boolean isInGlobalFieldMap = false;
for (int i = 0; i < trace.length; i++) {
- if ("org.apache.lucene.index.SegmentInfos".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
isCommit = true;
- if ("org.apache.lucene.store.MockDirectoryWrapper".equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName()))
+ if (MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName()))
isDelete = true;
- if ("org.apache.lucene.index.SegmentInfos".equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName()))
+ if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName()))
isInGlobalFieldMap = true;
}
@@ -1309,9 +1309,9 @@ public class TestIndexWriterExceptions e
boolean failOnInit = false;
boolean failOnfinish = false;
for (int i = 0; i < trace.length; i++) {
- if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ if (TermVectorsConsumer.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
failOnInit = true;
- if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ if (TermVectorsConsumer.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
failOnfinish = true;
}
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=1200692&r1=1200691&r2=1200692&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java Fri Nov 11 02:11:52 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.DefaultTermVectorsReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -185,8 +186,10 @@ public class TestTermVectorsReader exten
public void test() throws IOException {
//Check to see the files were created properly in setup
- assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)));
- assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", IndexFileNames.VECTORS_INDEX_EXTENSION)));
+ // TODO: fix this or at least add 'or' simpletext's extension
+ assumeFalse("test makes assumptions about filenames", Codec.getDefault().getName().equals("SimpleText"));
+ assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", DefaultTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)));
+ assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, "", DefaultTermVectorsReader.VECTORS_INDEX_EXTENSION)));
}
public void testReader() throws IOException {