You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2010/05/30 14:25:53 UTC
svn commit: r949509 [1/2] - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/index/
src/java/org/apache/lucene/index/codecs/preflex/
src/java/org/apache/lucene/store/ src/test/org/apache/lucene/index/
Author: shaie
Date: Sun May 30 12:25:52 2010
New Revision: 949509
URL: http://svn.apache.org/viewvc?rev=949509&view=rev
Log:
LUCENE-2480: Remove support for pre-3.0 indexes
Removed:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.19.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.19.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.20.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.20.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.21.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.21.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.22.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.22.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.23.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.23.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.24.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.24.nocfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.29.cfs.zip
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.29.nocfs.zip
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNameFilter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexInput.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun May 30 12:25:52 2010
@@ -74,6 +74,10 @@ Changes in backwards compatibility polic
character. Furthermore, the rest of the automaton package and RegexpQuery use
true Unicode codepoint representation. (Robert Muir, Mike McCandless)
+* LUCENE-2480: Though not a change in backwards compatibility policy, pre-3.0
+ indexes are no longer supported. You should upgrade to 3.x first, then run
+ optimize(), or reindex. (Shai Erera, Earwin Burrfoot)
+
Changes in runtime behavior
* LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun May 30 12:25:52 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
@@ -342,33 +343,13 @@ public class CheckIndex {
String sFormat = "";
boolean skip = false;
- if (format == SegmentInfos.FORMAT)
- sFormat = "FORMAT [Lucene Pre-2.1]";
- if (format == SegmentInfos.FORMAT_LOCKLESS)
- sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
- else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
- sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
- else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
- sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
- else {
- if (format == SegmentInfos.FORMAT_CHECKSUM)
- sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
- else if (format == SegmentInfos.FORMAT_DEL_COUNT)
- sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
- else if (format == SegmentInfos.FORMAT_HAS_PROX)
- sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
- else if (format == SegmentInfos.FORMAT_USER_DATA)
- sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
- else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
- sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
- else if (format == SegmentInfos.FORMAT_FLEX_POSTINGS)
- sFormat = "FORMAT_FLEX_POSTINGS [Lucene 3.1]";
- else if (format < SegmentInfos.CURRENT_FORMAT) {
- sFormat = "int=" + format + " [newer version of Lucene than this tool]";
- skip = true;
- } else {
- sFormat = format + " [Lucene 1.3 or prior]";
- }
+ if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+ sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_4_0)
+ sFormat = "FORMAT_FLEX_POSTINGS [Lucene 4.0]";
+ else if (format < SegmentInfos.CURRENT_FORMAT) {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ skip = true;
}
result.segmentsFileName = segmentsFileName;
@@ -656,7 +637,7 @@ public class CheckIndex {
int lastDoc = -1;
while(true) {
final int doc = docs2.nextDoc();
- if (doc == DocsEnum.NO_MORE_DOCS) {
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
final int freq = docs2.freq();
@@ -698,7 +679,7 @@ public class CheckIndex {
if (reader.hasDeletions()) {
final DocsEnum docsNoDel = terms.docs(null, docs);
int count = 0;
- while(docsNoDel.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
count++;
}
if (count != docFreq) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sun May 30 12:25:52 2010
@@ -639,10 +639,11 @@ final class DocumentsWriter {
consumer.flush(threads, flushState);
if (infoStream != null) {
- SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory, flushState.codec);
- si.setHasProx(hasProx());
+ SegmentInfo si = new SegmentInfo(flushState.segmentName,
+ flushState.numDocs, directory, false, -1, flushState.segmentName,
+ false, hasProx(), flushState.codec);
final long newSegmentSize = si.sizeInBytes();
- String message = " ramUsed=" + nf.format(((double) numBytesUsed)/1024./1024.) + " MB" +
+ String message = " ramUsed=" + nf.format(numBytesUsed/1024./1024.) + " MB" +
" newFlushedSize=" + newSegmentSize +
" docs/MB=" + nf.format(numDocsInRAM/(newSegmentSize/1024./1024.)) +
" new/old=" + nf.format(100.0*newSegmentSize/numBytesUsed) + "%";
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sun May 30 12:25:52 2010
@@ -36,9 +36,6 @@ import java.util.*;
*/
public final class FieldInfos {
- // Used internally (ie not written to *.fnm files) for pre-2.9 files
- public static final int FORMAT_PRE = -1;
-
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
@@ -68,29 +65,7 @@ public final class FieldInfos {
FieldInfos(Directory d, String name) throws IOException {
IndexInput input = d.openInput(name);
try {
- try {
- read(input, name);
- } catch (IOException ioe) {
- if (format == FORMAT_PRE) {
- // LUCENE-1623: FORMAT_PRE (before there was a
- // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
- // encoding; retry with input set to pre-utf8
- input.seek(0);
- input.setModifiedUTF8StringsMode();
- byNumber.clear();
- byName.clear();
- try {
- read(input, name);
- } catch (Throwable t) {
- // Ignore any new exception & throw original IOE
- throw ioe;
- }
- } else {
- // The IOException cannot be caused by
- // LUCENE-1623, so re-throw it
- throw ioe;
- }
- }
+ read(input, name);
} finally {
input.close();
}
@@ -330,25 +305,13 @@ public final class FieldInfos {
}
private void read(IndexInput input, String fileName) throws IOException {
- int firstInt = input.readVInt();
+ format = input.readVInt();
- if (firstInt < 0) {
- // This is a real format
- format = firstInt;
- } else {
- format = FORMAT_PRE;
- }
-
- if (format != FORMAT_PRE & format != FORMAT_START) {
+ if (format > FORMAT_START) {
throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
}
- int size;
- if (format == FORMAT_PRE) {
- size = firstInt;
- } else {
- size = input.readVInt(); //read in the size
- }
+ final int size = input.readVInt(); //read in the size
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java Sun May 30 12:25:52 2010
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.AbstractField;
-import org.apache.lucene.document.CompressionTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
@@ -33,7 +32,6 @@ import org.apache.lucene.util.CloseableT
import java.io.IOException;
import java.io.Reader;
-import java.util.zip.DataFormatException;
/**
* Class responsible for access to stored document fields.
@@ -41,6 +39,8 @@ import java.util.zip.DataFormatException
* It uses <segment>.fdt and <segment>.fdx; files.
*/
final class FieldsReader implements Cloneable {
+ private final static int FORMAT_SIZE = 4;
+
private final FieldInfos fieldInfos;
// The main fieldStream, used only for cloning.
@@ -56,7 +56,6 @@ final class FieldsReader implements Clon
private int size;
private boolean closed;
private final int format;
- private final int formatSize;
// The docID offset where our docs begin in the index
// file. This will be 0 if we have our own private file.
@@ -73,17 +72,16 @@ final class FieldsReader implements Clon
@Override
public Object clone() {
ensureOpen();
- return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
+ return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
}
// Used only by clone
- private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize,
- int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
+ private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
+ IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
this.size = size;
this.format = format;
- this.formatSize = formatSize;
this.docStoreOffset = docStoreOffset;
this.cloneableFieldsStream = cloneableFieldsStream;
this.cloneableIndexStream = cloneableIndexStream;
@@ -95,10 +93,6 @@ final class FieldsReader implements Clon
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
}
- FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException {
- this(d, segment, fn, readBufferSize, -1, 0);
- }
-
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
boolean success = false;
isOriginal = true;
@@ -108,30 +102,15 @@ final class FieldsReader implements Clon
cloneableFieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_EXTENSION), readBufferSize);
cloneableIndexStream = d.openInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION), readBufferSize);
- // First version of fdx did not include a format
- // header, but, the first int will always be 0 in that
- // case
- int firstInt = cloneableIndexStream.readInt();
- if (firstInt == 0)
- format = 0;
- else
- format = firstInt;
+ format = cloneableIndexStream.readInt();
if (format > FieldsWriter.FORMAT_CURRENT)
throw new CorruptIndexException("Incompatible format version: " + format + " expected "
+ FieldsWriter.FORMAT_CURRENT + " or lower");
- if (format > FieldsWriter.FORMAT)
- formatSize = 4;
- else
- formatSize = 0;
-
- if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
- cloneableFieldsStream.setModifiedUTF8StringsMode();
-
fieldsStream = (IndexInput) cloneableFieldsStream.clone();
- final long indexSize = cloneableIndexStream.length()-formatSize;
+ final long indexSize = cloneableIndexStream.length() - FORMAT_SIZE;
if (docStoreOffset != -1) {
// We read only a slice out of this shared fields file
@@ -201,8 +180,8 @@ final class FieldsReader implements Clon
return size;
}
- private final void seekIndex(int docID) throws IOException {
- indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
+ private void seekIndex(int docID) throws IOException {
+ indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
}
boolean canReadRawDocs() {
@@ -226,34 +205,31 @@ final class FieldsReader implements Clon
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
byte bits = fieldsStream.readByte();
- assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+ assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
- boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
- assert (compressed ? (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true)
- : "compressed fields are only allowed in indexes of version <= 2.9";
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
//TODO: Find an alternative approach here if this list continues to grow beyond the
//list of 5 or 6 currently here. See Lucene 762 for discussion
if (acceptField.equals(FieldSelectorResult.LOAD)) {
- addField(doc, fi, binary, compressed, tokenize);
+ addField(doc, fi, binary, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
- addField(doc, fi, binary, compressed, tokenize);
+ addField(doc, fi, binary, tokenize);
break;//Get out of this loop
}
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, compressed, tokenize);
+ addFieldLazy(doc, fi, binary, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.SIZE)){
- skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
+ skipField(addFieldSize(doc, fi, binary));
}
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
- addFieldSize(doc, fi, binary, compressed);
+ addFieldSize(doc, fi, binary);
break;
}
else {
- skipField(binary, compressed);
+ skipField();
}
}
@@ -290,25 +266,20 @@ final class FieldsReader implements Clon
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
- private void skipField(boolean binary, boolean compressed) throws IOException {
- skipField(binary, compressed, fieldsStream.readVInt());
+ private void skipField() throws IOException {
+ skipField(fieldsStream.readVInt());
}
- private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
- if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
- fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
- } else {
- // We need to skip chars. This will slow us down, but still better
- fieldsStream.skipChars(toRead);
- }
+ private void skipField(int toRead) throws IOException {
+ fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed));
+ doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
@@ -317,75 +288,42 @@ final class FieldsReader implements Clon
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
AbstractField f;
- if (compressed) {
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- f = new LazyField(fi.name, store, toRead, pointer, binary, compressed);
- //skip over the part that we aren't loading
- fieldsStream.seek(pointer + toRead);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- } else {
- int length = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- //Skip ahead of where we are by the length of what is stored
- if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
- fieldsStream.seek(pointer+length);
- } else {
- fieldsStream.skipChars(length);
- }
- f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- }
-
+ int length = fieldsStream.readVInt();
+ long pointer = fieldsStream.getFilePointer();
+ //Skip ahead of where we are by the length of what is stored
+ fieldsStream.seek(pointer+length);
+ f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
+ f.setOmitNorms(fi.omitNorms);
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+
doc.add(f);
}
}
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {
+ private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
//we have a binary stored field, and it may be compressed
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
- if (compressed) {
- doc.add(new Field(fi.name, uncompress(b)));
- } else {
- doc.add(new Field(fi.name, b));
- }
+ doc.add(new Field(fi.name, b));
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
AbstractField f;
- if (compressed) {
- int toRead = fieldsStream.readVInt();
+ f = new Field(fi.name, // name
+ false,
+ fieldsStream.readString(), // read value
+ store,
+ index,
+ termVector);
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ f.setOmitNorms(fi.omitNorms);
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- f = new Field(fi.name, // field name
- false,
- new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
- store,
- index,
- termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
- } else {
- f = new Field(fi.name, // name
- false,
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
- }
-
doc.add(f);
}
}
@@ -393,8 +331,8 @@ final class FieldsReader implements Clon
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
- private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
- int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
+ private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
+ int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
@@ -411,11 +349,8 @@ final class FieldsReader implements Clon
private class LazyField extends AbstractField implements Fieldable {
private int toRead;
private long pointer;
- /** @deprecated Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0. */
- @Deprecated
- private boolean isCompressed;
- public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
+ public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
@@ -423,10 +358,9 @@ final class FieldsReader implements Clon
if (isBinary)
binaryLength = toRead;
lazy = true;
- this.isCompressed = isCompressed;
}
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
+ public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
@@ -434,7 +368,6 @@ final class FieldsReader implements Clon
if (isBinary)
binaryLength = toRead;
lazy = true;
- this.isCompressed = isCompressed;
}
private IndexInput getFieldStream() {
@@ -474,22 +407,9 @@ final class FieldsReader implements Clon
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
- if (isCompressed) {
- final byte[] b = new byte[toRead];
- localFieldsStream.readBytes(b, 0, b.length);
- fieldsData = new String(uncompress(b), "UTF-8");
- } else {
- if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
- byte[] bytes = new byte[toRead];
- localFieldsStream.readBytes(bytes, 0, toRead);
- fieldsData = new String(bytes, "UTF-8");
- } else {
- //read in chars b/c we already know the length we need to read
- char[] chars = new char[toRead];
- localFieldsStream.readChars(chars, 0, toRead);
- fieldsData = new String(chars);
- }
- }
+ byte[] bytes = new byte[toRead];
+ localFieldsStream.readBytes(bytes, 0, toRead);
+ fieldsData = new String(bytes, "UTF-8");
} catch (IOException e) {
throw new FieldReaderException(e);
}
@@ -498,26 +418,6 @@ final class FieldsReader implements Clon
}
}
- public long getPointer() {
- ensureOpen();
- return pointer;
- }
-
- public void setPointer(long pointer) {
- ensureOpen();
- this.pointer = pointer;
- }
-
- public int getToRead() {
- ensureOpen();
- return toRead;
- }
-
- public void setToRead(int toRead) {
- ensureOpen();
- this.toRead = toRead;
- }
-
@Override
public byte[] getBinaryValue(byte[] result) {
ensureOpen();
@@ -538,11 +438,7 @@ final class FieldsReader implements Clon
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
- if (isCompressed == true) {
- fieldsData = uncompress(b);
- } else {
- fieldsData = b;
- }
+ fieldsData = b;
} catch (IOException e) {
throw new FieldReaderException(e);
}
@@ -556,16 +452,4 @@ final class FieldsReader implements Clon
return null;
}
}
-
- private byte[] uncompress(byte[] b)
- throws CorruptIndexException {
- try {
- return CompressionTools.decompress(b);
- } catch (DataFormatException e) {
- // this will happen if the field is not compressed
- CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
- newException.initCause(e);
- throw newException;
- }
- }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Sun May 30 12:25:52 2010
@@ -31,16 +31,6 @@ final class FieldsWriter
static final byte FIELD_IS_TOKENIZED = 0x1;
static final byte FIELD_IS_BINARY = 0x2;
- /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */
- @Deprecated
- static final byte FIELD_IS_COMPRESSED = 0x4;
-
- // Original format
- static final int FORMAT = 0;
-
- // Changed strings to UTF8
- static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
-
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNameFilter.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNameFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNameFilter.java Sun May 30 12:25:52 2010
@@ -62,8 +62,7 @@ public class IndexFileNameFilter impleme
return true;
}
} else {
- if (name.equals(IndexFileNames.DELETABLE)) return true;
- else if (name.startsWith(IndexFileNames.SEGMENTS)) return true;
+ if (name.startsWith(IndexFileNames.SEGMENTS)) return true;
}
return false;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Sun May 30 12:25:52 2010
@@ -46,10 +46,6 @@ public final class IndexFileNames {
/** Name of the generation reference file name */
public static final String SEGMENTS_GEN = "segments." + GEN_EXTENSION;
- /** Name of the index deletable file (only used in
- * pre-lockless indices) */
- public static final String DELETABLE = "deletable";
-
/** Extension of norms file */
public static final String NORMS_EXTENSION = "nrm";
@@ -80,19 +76,15 @@ public final class IndexFileNames {
/** Extension of field infos */
public static final String FIELD_INFOS_EXTENSION = "fnm";
- /** Extension of plain norms */
- public static final String PLAIN_NORMS_EXTENSION = "f";
-
/** Extension of separate norms */
public static final String SEPARATE_NORMS_EXTENSION = "s";
/**
* This array contains all filename extensions used by
- * Lucene's index files, with two exceptions, namely the
- * extension made up from <code>.f</code> + a number and
- * from <code>.s</code> + a number. Also note that
- * Lucene's <code>segments_N</code> files do not have any
- * filename extension.
+ * Lucene's index files, with one exception, namely the
+ * extension made up from <code>.s</code> + a number.
+ * Also note that Lucene's <code>segments_N</code> files
+ * do not have any filename extension.
*/
public static final String INDEX_EXTENSIONS[] = new String[] {
COMPOUND_FILE_EXTENSION,
@@ -146,7 +138,7 @@ public final class IndexFileNames {
* @param ext extension of the filename
* @param gen generation
*/
- public static final String fileNameFromGeneration(String base, String ext, long gen) {
+ public static String fileNameFromGeneration(String base, String ext, long gen) {
if (gen == SegmentInfo.NO) {
return null;
} else if (gen == SegmentInfo.WITHOUT_GEN) {
@@ -168,7 +160,7 @@ public final class IndexFileNames {
* Returns true if the provided filename is one of the doc store files (ends
* with an extension in {@link #STORE_INDEX_EXTENSIONS}).
*/
- public static final boolean isDocStoreFile(String fileName) {
+ public static boolean isDocStoreFile(String fileName) {
if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION))
return true;
for (String ext : STORE_INDEX_EXTENSIONS) {
@@ -193,7 +185,7 @@ public final class IndexFileNames {
* otherwise some structures may fail to handle them properly (such as if they
* are added to compound files).
*/
- public static final String segmentFileName(String segmentName, String name, String ext) {
+ public static String segmentFileName(String segmentName, String name, String ext) {
if (ext.length() > 0 || name.length() > 0) {
assert !ext.startsWith(".");
StringBuilder sb = new StringBuilder(segmentName.length() + 2 + name.length() + ext.length());
@@ -214,7 +206,7 @@ public final class IndexFileNames {
* Returns true if the given filename ends with the given extension. One
* should provide a <i>pure</i> extension, withouth '.'.
*/
- public static final boolean matchesExtension(String filename, String ext) {
+ public static boolean matchesExtension(String filename, String ext) {
// It doesn't make a difference whether we allocate a StringBuilder ourself
// or not, since there's only 1 '+' operator.
return filename.endsWith("." + ext);
@@ -229,7 +221,7 @@ public final class IndexFileNames {
* @return the filename with the segment name removed, or the given filename
* if it does not contain a '.' and '_'.
*/
- public static final String stripSegmentName(String filename) {
+ public static String stripSegmentName(String filename) {
// If it is a .del file, there's an '_' after the first character
int idx = filename.indexOf('_', 1);
if (idx == -1) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun May 30 12:25:52 2010
@@ -2969,8 +2969,8 @@ public class IndexWriter implements Clos
SegmentInfo info = null;
synchronized(this) {
- info = new SegmentInfo(mergedName, docCount, directory, false, true,
- -1, null, false, merger.hasProx(), merger.getCodec());
+ info = new SegmentInfo(mergedName, docCount, directory, false, -1,
+ null, false, merger.hasProx(), merger.getCodec());
setDiagnostics(info, "addIndexes(IndexReader...)");
segmentInfos.add(info);
checkpoint();
@@ -3335,10 +3335,9 @@ public class IndexWriter implements Clos
// successfully.
newSegment = new SegmentInfo(segment,
flushedDocCount,
- directory, false, true,
- docStoreOffset, docStoreSegment,
- docStoreIsCompoundFile,
- docWriter.hasProx(),
+ directory, false, docStoreOffset,
+ docStoreSegment, docStoreIsCompoundFile,
+ docWriter.hasProx(),
docWriter.getCodec());
setDiagnostics(newSegment, "flush");
@@ -3853,8 +3852,7 @@ public class IndexWriter implements Clos
// ConcurrentMergePolicy we keep deterministic segment
// names.
merge.info = new SegmentInfo(newSegmentName(), 0,
- directory, false, true,
- docStoreOffset,
+ directory, false, docStoreOffset,
docStoreSegment,
docStoreIsCompoundFile,
false,
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Sun May 30 12:25:52 2010
@@ -20,17 +20,16 @@ package org.apache.lucene.index;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.BitVector;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import java.io.IOException;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.HashMap;
import java.util.ArrayList;
-import java.util.Collections;
/**
* Information about a segment such as it's name, directory, and files related
@@ -42,41 +41,30 @@ public final class SegmentInfo {
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
- static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
- private boolean preLockless; // true if this is a segments file written before
- // lock-less commits (2.1)
-
- private long delGen; // current generation of del file; NO if there
- // are no deletes; CHECK_DIR if it's a pre-2.1 segment
- // (and we must check filesystem); YES or higher if
- // there are deletes at generation N
-
- private long[] normGen; // current generation of each field's norm file.
- // If this array is null, for lockLess this means no
- // separate norms. For preLockLess this means we must
- // check filesystem. If this array is not null, its
- // values mean: NO says this field has no separate
- // norms; CHECK_DIR says it is a preLockLess segment and
- // filesystem must be checked; >= YES says this field
- // has separate norms with the specified generation
-
- private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
- // pre-2.1 (ie, must check file system to see
- // if <name>.cfs and <name>.nrm exist)
-
- private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
- // false otherwise
- // this is currently false for segments populated by DocumentWriter
- // and true for newly created merged segments (both
- // compound and non compound).
+ /*
+ * Current generation of del file:
+ * - NO if there are no deletes
+ * - YES or higher if there are deletes at generation N
+ */
+ private long delGen;
- private List<String> files; // cached list of files that this segment uses
+ /*
+ * Current generation of each field's norm file. If this array is null,
+ * means no separate norms. If this array is not null, its values mean:
+ * - NO says this field has no separate norms
+ * >= YES says this field has separate norms with the specified generation
+ */
+ private long[] normGen;
+
+ private boolean isCompoundFile;
+
+ private List<String> files; // cached list of files that this segment uses
// in the Directory
long sizeInBytes = -1; // total byte size of all of our files (computed on demand)
@@ -87,8 +75,7 @@ public final class SegmentInfo {
// other segments
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
- private int delCount; // How many deleted docs in this segment, or -1 if not yet known
- // (if it's an older index)
+ private int delCount; // How many deleted docs in this segment
private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
@@ -97,29 +84,13 @@ public final class SegmentInfo {
private Map<String,String> diagnostics;
- public SegmentInfo(String name, int docCount, Directory dir, Codec codec) {
+ public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, int docStoreOffset,
+ String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
delGen = NO;
- isCompoundFile = CHECK_DIR;
- preLockless = true;
- hasSingleNormFile = false;
- docStoreOffset = -1;
- docStoreSegment = name;
- docStoreIsCompoundFile = false;
- delCount = 0;
- hasProx = true;
- this.codec = codec;
- }
-
- public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
- int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx,
- Codec codec) {
- this(name, docCount, dir, codec);
- this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
- this.hasSingleNormFile = hasSingleNormFile;
- preLockless = false;
+ this.isCompoundFile = isCompoundFile;
this.docStoreOffset = docStoreOffset;
this.docStoreSegment = docStoreSegment;
this.docStoreIsCompoundFile = docStoreIsCompoundFile;
@@ -137,7 +108,6 @@ public final class SegmentInfo {
name = src.name;
docCount = src.docCount;
dir = src.dir;
- preLockless = src.preLockless;
delGen = src.delGen;
docStoreOffset = src.docStoreOffset;
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
@@ -148,7 +118,6 @@ public final class SegmentInfo {
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
}
isCompoundFile = src.isCompoundFile;
- hasSingleNormFile = src.hasSingleNormFile;
delCount = src.delCount;
codec = src.codec;
}
@@ -174,98 +143,46 @@ public final class SegmentInfo {
name = input.readString();
docCount = input.readInt();
final String codecName;
- if (format <= SegmentInfos.FORMAT_LOCKLESS) {
- delGen = input.readLong();
- if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
- docStoreOffset = input.readInt();
- if (docStoreOffset != -1) {
- docStoreSegment = input.readString();
- docStoreIsCompoundFile = (1 == input.readByte());
- } else {
- docStoreSegment = name;
- docStoreIsCompoundFile = false;
- }
- } else {
- docStoreOffset = -1;
- docStoreSegment = name;
- docStoreIsCompoundFile = false;
- }
- if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
- hasSingleNormFile = (1 == input.readByte());
- } else {
- hasSingleNormFile = false;
- }
- int numNormGen = input.readInt();
- if (numNormGen == NO) {
- normGen = null;
- } else {
- normGen = new long[numNormGen];
- for(int j=0;j<numNormGen;j++) {
- normGen[j] = input.readLong();
- }
- }
- isCompoundFile = input.readByte();
- preLockless = (isCompoundFile == CHECK_DIR);
- if (format <= SegmentInfos.FORMAT_DEL_COUNT) {
- delCount = input.readInt();
- assert delCount <= docCount;
- } else
- delCount = -1;
- if (format <= SegmentInfos.FORMAT_HAS_PROX)
- hasProx = input.readByte() == 1;
- else
- hasProx = true;
-
- // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
-
- if (format <= SegmentInfos.FORMAT_FLEX_POSTINGS)
- codecName = input.readString();
- else
- codecName = "PreFlex";
-
- if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) {
- diagnostics = input.readStringStringMap();
- } else {
- diagnostics = Collections.<String,String>emptyMap();
- }
+ delGen = input.readLong();
+ docStoreOffset = input.readInt();
+ if (docStoreOffset != -1) {
+ docStoreSegment = input.readString();
+ docStoreIsCompoundFile = input.readByte() == YES;
} else {
- delGen = CHECK_DIR;
- normGen = null;
- isCompoundFile = CHECK_DIR;
- preLockless = true;
- hasSingleNormFile = false;
- docStoreOffset = -1;
+ docStoreSegment = name;
docStoreIsCompoundFile = false;
- docStoreSegment = null;
- delCount = -1;
- hasProx = true;
- codecName = "PreFlex";
- diagnostics = Collections.<String,String>emptyMap();
}
- codec = codecs.lookup(codecName);
- }
-
- void setNumFields(int numFields) {
- if (normGen == null) {
- // normGen is null if we loaded a pre-2.1 segment
- // file, or, if this segments file hasn't had any
- // norms set against it yet:
- normGen = new long[numFields];
-
- if (preLockless) {
- // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
- // we have to check filesystem for norm files, because this is prelockless.
-
- } else {
- // This is a FORMAT_LOCKLESS segment, which means
- // there are no separate norms:
- for(int i=0;i<numFields;i++) {
- normGen[i] = NO;
- }
+ if (format > SegmentInfos.FORMAT_4_0) {
+ // pre-4.0 indexes write a byte if there is a single norms file
+ assert 1 == input.readByte();
+ }
+ int numNormGen = input.readInt();
+ if (numNormGen == NO) {
+ normGen = null;
+ } else {
+ normGen = new long[numNormGen];
+ for(int j=0;j<numNormGen;j++) {
+ normGen[j] = input.readLong();
}
}
- }
+ isCompoundFile = input.readByte() == YES;
+ delCount = input.readInt();
+ assert delCount <= docCount;
+
+ hasProx = input.readByte() == YES;
+
+ // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
+
+ if (format <= SegmentInfos.FORMAT_4_0)
+ codecName = input.readString();
+ else
+ codecName = "PreFlex";
+
+ diagnostics = input.readStringStringMap();
+ codec = codecs.lookup(codecName);
+ }
+
/** Returns total size in bytes of all of files used by
* this segment. */
public long sizeInBytes() throws IOException {
@@ -284,33 +201,16 @@ public final class SegmentInfo {
return sizeInBytes;
}
- public boolean hasDeletions()
- throws IOException {
+ public boolean hasDeletions() {
// Cases:
//
- // delGen == NO: this means this segment was written
- // by the LOCKLESS code and for certain does not have
- // deletions yet
- //
- // delGen == CHECK_DIR: this means this segment was written by
- // pre-LOCKLESS code which means we must check
- // directory to see if .del file exists
- //
- // delGen >= YES: this means this segment was written by
- // the LOCKLESS code and for certain has
- // deletions
+ // delGen == NO: this means this segment does not have deletions yet
+ // delGen >= YES: this means this segment has deletions
//
- if (delGen == NO) {
- return false;
- } else if (delGen >= YES) {
- return true;
- } else {
- return dir.fileExists(getDelFileName());
- }
+ return delGen != NO;
}
void advanceDelGen() {
- // delGen 0 is reserved for pre-LOCKLESS format
if (delGen == NO) {
delGen = YES;
} else {
@@ -325,14 +225,12 @@ public final class SegmentInfo {
}
@Override
- public Object clone () {
- SegmentInfo si = new SegmentInfo(name, docCount, dir, codec);
+ public Object clone() {
+ SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, hasProx, codec);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.delCount = delCount;
si.hasProx = hasProx;
- si.preLockless = preLockless;
- si.hasSingleNormFile = hasSingleNormFile;
si.diagnostics = new HashMap<String, String>(diagnostics);
if (normGen != null) {
si.normGen = normGen.clone();
@@ -350,7 +248,6 @@ public final class SegmentInfo {
// against this segment
return null;
} else {
- // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -360,69 +257,34 @@ public final class SegmentInfo {
*
* @param fieldNumber the field index to check
*/
- public boolean hasSeparateNorms(int fieldNumber)
- throws IOException {
- if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) {
- // Must fallback to directory file exists check:
- String fileName = name + ".s" + fieldNumber;
- return dir.fileExists(fileName);
- } else if (normGen == null || normGen[fieldNumber] == NO) {
- return false;
- } else {
- return true;
- }
+ public boolean hasSeparateNorms(int fieldNumber) {
+ return normGen != null && normGen[fieldNumber] != NO;
}
/**
* Returns true if any fields in this segment have separate norms.
*/
- public boolean hasSeparateNorms()
- throws IOException {
+ public boolean hasSeparateNorms() {
if (normGen == null) {
- if (!preLockless) {
- // This means we were created w/ LOCKLESS code and no
- // norms are written yet:
- return false;
- } else {
- // This means this segment was saved with pre-LOCKLESS
- // code. So we must fallback to the original
- // directory list check:
- String[] result = dir.listAll();
- if (result == null)
- throw new IOException("cannot read directory " + dir + ": listAll() returned null");
-
- final String pattern = name + ".s\\d+";
- for(int i = 0; i < result.length; i++){
- String fileName = result[i];
- if (fileName.matches(pattern)) {
- return true;
- }
- }
- return false;
- }
+ return false;
} else {
- // This means this segment was saved with LOCKLESS
- // code so we first check whether any normGen's are >= 1
- // (meaning they definitely have separate norms):
- for(int i=0;i<normGen.length;i++) {
- if (normGen[i] >= YES) {
+ for (long fieldNormGen : normGen) {
+ if (fieldNormGen >= YES) {
return true;
}
}
- // Next we look for any == 0. These cases were
- // pre-LOCKLESS and must be checked in directory:
- for(int i=0;i<normGen.length;i++) {
- if (normGen[i] == CHECK_DIR) {
- if (hasSeparateNorms(i)) {
- return true;
- }
- }
- }
}
return false;
}
+ void initNormGen(int numFields) {
+ if (normGen == null) { // normGen is null if this segments file hasn't had any norms set against it yet
+ normGen = new long[numFields];
+ Arrays.fill(normGen, NO);
+ }
+ }
+
/**
* Increment the generation count for the norms file for
* this field.
@@ -443,26 +305,13 @@ public final class SegmentInfo {
*
* @param number field index
*/
- public String getNormFileName(int number) throws IOException {
- long gen;
- if (normGen == null) {
- gen = CHECK_DIR;
- } else {
- gen = normGen[number];
- }
-
+ public String getNormFileName(int number) {
if (hasSeparateNorms(number)) {
- // case 1: separate norm
- return IndexFileNames.fileNameFromGeneration(name, "s" + number, gen);
- }
-
- if (hasSingleNormFile) {
- // case 2: lockless (or nrm file exists) - single file for all norms
+ return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen[number]);
+ } else {
+ // single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
}
-
- // case 3: norm file for each field
- return IndexFileNames.fileNameFromGeneration(name, "f" + number, WITHOUT_GEN);
}
/**
@@ -472,11 +321,7 @@ public final class SegmentInfo {
* else, false
*/
void setUseCompoundFile(boolean isCompoundFile) {
- if (isCompoundFile) {
- this.isCompoundFile = YES;
- } else {
- this.isCompoundFile = NO;
- }
+ this.isCompoundFile = isCompoundFile;
clearFiles();
}
@@ -484,25 +329,11 @@ public final class SegmentInfo {
* Returns true if this segment is stored as a compound
* file; else, false.
*/
- public boolean getUseCompoundFile() throws IOException {
- if (isCompoundFile == NO) {
- return false;
- } else if (isCompoundFile == YES) {
- return true;
- } else {
- return dir.fileExists(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
- }
+ public boolean getUseCompoundFile() {
+ return isCompoundFile;
}
- public int getDelCount() throws IOException {
- if (delCount == -1) {
- if (hasDeletions()) {
- final String delFileName = getDelFileName();
- delCount = new BitVector(dir, delFileName).count();
- } else
- delCount = 0;
- }
- assert delCount <= docCount;
+ public int getDelCount() {
return delCount;
}
@@ -540,9 +371,7 @@ public final class SegmentInfo {
clearFiles();
}
- /**
- * Save this segment's info.
- */
+ /** Save this segment's info. */
void write(IndexOutput output)
throws IOException {
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
@@ -555,16 +384,16 @@ public final class SegmentInfo {
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
}
- output.writeByte((byte) (hasSingleNormFile ? 1:0));
if (normGen == null) {
output.writeInt(NO);
} else {
output.writeInt(normGen.length);
- for(int j = 0; j < normGen.length; j++) {
- output.writeLong(normGen[j]);
+ for (long fieldNormGen : normGen) {
+ output.writeLong(fieldNormGen);
}
}
- output.writeByte(isCompoundFile);
+
+ output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
output.writeByte((byte) (hasProx ? 1:0));
output.writeString(codec.name);
@@ -644,51 +473,12 @@ public final class SegmentInfo {
fileSet.add(delFileName);
}
- // Careful logic for norms files
if (normGen != null) {
- for(int i=0;i<normGen.length;i++) {
+ for (int i = 0; i < normGen.length; i++) {
long gen = normGen[i];
if (gen >= YES) {
// Definitely a separate norm file, with generation:
fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
- } else if (NO == gen) {
- // No separate norms but maybe plain norms
- // in the non compound file case:
- if (!hasSingleNormFile && !useCompoundFile) {
- String fileName = IndexFileNames.segmentFileName(name, "", IndexFileNames.PLAIN_NORMS_EXTENSION + i);
- if (dir.fileExists(fileName)) {
- fileSet.add(fileName);
- }
- }
- } else if (CHECK_DIR == gen) {
- // Pre-2.1: we have to check file existence
- String fileName = null;
- if (useCompoundFile) {
- fileName = IndexFileNames.segmentFileName(name, "", IndexFileNames.SEPARATE_NORMS_EXTENSION + i);
- } else if (!hasSingleNormFile) {
- fileName = IndexFileNames.segmentFileName(name, "", IndexFileNames.PLAIN_NORMS_EXTENSION + i);
- }
- if (fileName != null && dir.fileExists(fileName)) {
- fileSet.add(fileName);
- }
- }
- }
- } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) {
- // Pre-2.1: we have to scan the dir to find all
- // matching _X.sN/_X.fN files for our segment:
- String prefix;
- if (useCompoundFile) {
- prefix = IndexFileNames.segmentFileName(name, "", IndexFileNames.SEPARATE_NORMS_EXTENSION);
- } else {
- prefix = IndexFileNames.segmentFileName(name, "", IndexFileNames.PLAIN_NORMS_EXTENSION);
- }
- final String pattern = prefix + "\\d+";
-
- String[] allFiles = dir.listAll();
- for(int i=0;i<allFiles.length;i++) {
- String fileName = allFiles[i];
- if (fileName.matches(pattern)) {
- fileSet.add(fileName);
}
}
}
@@ -727,16 +517,7 @@ public final class SegmentInfo {
StringBuilder s = new StringBuilder();
s.append(name).append(':');
- char cfs;
- try {
- if (getUseCompoundFile()) {
- cfs = 'c';
- } else {
- cfs = 'C';
- }
- } catch (IOException ioe) {
- cfs = '?';
- }
+ char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
if (this.dir != dir) {
@@ -744,22 +525,9 @@ public final class SegmentInfo {
}
s.append(docCount);
- int delCount;
- try {
- delCount = getDelCount();
- } catch (IOException ioe) {
- delCount = -1;
- }
- if (delCount != -1) {
- delCount += pendingDelCount;
- }
+ int delCount = getDelCount() + pendingDelCount;
if (delCount != 0) {
- s.append('/');
- if (delCount == -1) {
- s.append('?');
- } else {
- s.append(delCount);
- }
+ s.append('/').append(delCount);
}
if (docStoreOffset != -1) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sun May 30 12:25:52 2010
@@ -45,59 +45,30 @@ import java.util.Map;
*/
public final class SegmentInfos extends Vector<SegmentInfo> {
- /** The file format version, a negative number. */
- /* Works since counter, the old 1st entry, is always >= 0 */
- public static final int FORMAT = -1;
-
- /** This format adds details used for lockless commits. It differs
- * slightly from the previous format in that file names
- * are never re-used (write once). Instead, each file is
- * written to the next generation. For example,
- * segments_1, segments_2, etc. This allows us to not use
- * a commit lock. See <a
- * href="http://lucene.apache.org/java/docs/fileformats.html">file
- * formats</a> for details.
- */
- public static final int FORMAT_LOCKLESS = -2;
-
- /** This format adds a "hasSingleNormFile" flag into each segment info.
- * See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
- * for details.
- */
- public static final int FORMAT_SINGLE_NORM_FILE = -3;
-
- /** This format allows multiple segments to share a single
- * vectors and stored fields file. */
- public static final int FORMAT_SHARED_DOC_STORE = -4;
-
- /** This format adds a checksum at the end of the file to
- * ensure all bytes were successfully written. */
- public static final int FORMAT_CHECKSUM = -5;
-
- /** This format adds the deletion count for each segment.
- * This way IndexWriter can efficiently report numDocs(). */
- public static final int FORMAT_DEL_COUNT = -6;
-
- /** This format adds the boolean hasProx to record if any
- * fields in the segment store prox information (ie, have
- * omitTermFreqAndPositions==false) */
- public static final int FORMAT_HAS_PROX = -7;
-
- /** This format adds optional commit userData (String) storage. */
- public static final int FORMAT_USER_DATA = -8;
+ /*
+ * The file format version, a negative number.
+ *
+ * NOTE: future format numbers must always be one smaller
+ * than the latest. With time, support for old formats will
+ * be removed, however the numbers should continue to decrease.
+ */
+ /** Used for the segments.gen file only! */
+ public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
+
/** This format adds optional per-segment String
* diagnostics storage, and switches userData to Map */
public static final int FORMAT_DIAGNOSTICS = -9;
-
+
/** Each segment records whether its postings are written
* in the new flex format */
- public static final int FORMAT_FLEX_POSTINGS = -10;
+ public static final int FORMAT_4_0 = -10;
/* This must always point to the most recent file format. */
- static final int CURRENT_FORMAT = FORMAT_FLEX_POSTINGS;
+ static final int CURRENT_FORMAT = FORMAT_4_0;
public int counter = 0; // used to name new segments
+
/**
* counts how often the index has been changed by adding or deleting docs.
* starting with the current time in milliseconds forces to create unique version numbers.
@@ -132,8 +103,7 @@ public final class SegmentInfos extends
return -1;
}
long max = -1;
- for (int i = 0; i < files.length; i++) {
- String file = files[i];
+ for (String file : files) {
if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
long gen = generationFromSegmentsFileName(file);
if (gen > max) {
@@ -248,46 +218,25 @@ public final class SegmentInfos extends
try {
int format = input.readInt();
- if(format < 0){ // file contains explicit format info
- // check that it is a format we can understand
- if (format < CURRENT_FORMAT)
- throw new CorruptIndexException("Unknown format version: " + format);
- version = input.readLong(); // read version
- counter = input.readInt(); // read counter
- }
- else{ // file is in old format without explicit format info
- counter = format;
- }
+
+ // check that it is a format we can understand
+ if (format < CURRENT_FORMAT)
+ throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
+
+ version = input.readLong(); // read version
+ counter = input.readInt(); // read counter
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
add(new SegmentInfo(directory, format, input, codecs));
}
- if(format >= 0){ // in old format the version number may be at the end of the file
- if (input.getFilePointer() >= input.length())
- version = System.currentTimeMillis(); // old file format without version number
- else
- version = input.readLong(); // read version
- }
+ userData = input.readStringStringMap();
- if (format <= FORMAT_USER_DATA) {
- if (format <= FORMAT_DIAGNOSTICS) {
- userData = input.readStringStringMap();
- } else if (0 != input.readByte()) {
- userData = Collections.singletonMap("userData", input.readString());
- } else {
- userData = Collections.<String,String>emptyMap();
- }
- } else {
- userData = Collections.<String,String>emptyMap();
- }
+ final long checksumNow = input.getChecksum();
+ final long checksumThen = input.readLong();
+ if (checksumNow != checksumThen)
+ throw new CorruptIndexException("checksum mismatch in segments file");
- if (format <= FORMAT_CHECKSUM) {
- final long checksumNow = input.getChecksum();
- final long checksumThen = input.readLong();
- if (checksumNow != checksumThen)
- throw new CorruptIndexException("checksum mismatch in segments file");
- }
success = true;
}
finally {
@@ -327,7 +276,7 @@ public final class SegmentInfos extends
// before finishCommit is called
ChecksumIndexOutput pendingSegnOutput;
- private final void write(Directory directory) throws IOException {
+ private void write(Directory directory) throws IOException {
String segmentFileName = getNextSegmentFileName();
@@ -348,8 +297,8 @@ public final class SegmentInfos extends
// the index
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
- for (int i = 0; i < size(); i++) {
- info(i).write(segnOutput);
+ for (SegmentInfo si : this) {
+ si.write(segnOutput);
}
segnOutput.writeStringStringMap(userData);
segnOutput.prepareCommit();
@@ -612,7 +561,7 @@ public final class SegmentInfos extends
if (genInput != null) {
try {
int version = genInput.readInt();
- if (version == FORMAT_LOCKLESS) {
+ if (version == FORMAT_SEGMENTS_GEN_CURRENT) {
long gen0 = genInput.readLong();
long gen1 = genInput.readLong();
if (infoStream != null) {
@@ -642,10 +591,7 @@ public final class SegmentInfos extends
}
// Pick the larger of the two gen's:
- if (genA > genB)
- gen = genA;
- else
- gen = genB;
+ gen = Math.max(genA, genB);
if (gen == -1) {
// Neither approach found a generation
@@ -858,9 +804,7 @@ public final class SegmentInfos extends
// logic in SegmentInfos to kick in and load the last
// good (previous) segments_N-1 file.
- final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
- "",
- generation);
+ final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
success = false;
try {
dir.sync(Collections.singleton(fileName));
@@ -880,7 +824,7 @@ public final class SegmentInfos extends
try {
IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN);
try {
- genOutput.writeInt(FORMAT_LOCKLESS);
+ genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT);
genOutput.writeLong(generation);
genOutput.writeLong(generation);
} finally {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java Sun May 30 12:25:52 2010
@@ -728,7 +728,7 @@ public class SegmentReader extends Index
}
if (normsDirty) { // re-write norms
- si.setNumFields(core.fieldInfos.size());
+ si.initNormGen(core.fieldInfos.size());
for (final Norm norm : norms.values()) {
if (norm.dirty) {
norm.reWrite(si);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java Sun May 30 12:25:52 2010
@@ -29,10 +29,6 @@ class TermVectorsReader implements Clone
// NOTE: if you make a new format, it must be larger than
// the current format
- static final int FORMAT_VERSION = 2;
-
- // Changes to speed up bulk merging of term vectors:
- static final int FORMAT_VERSION2 = 3;
// Changed strings to UTF8 with length-in-bytes not length-in-chars
static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
@@ -87,13 +83,8 @@ class TermVectorsReader implements Clone
assert format == tvdFormat;
assert format == tvfFormat;
- if (format >= FORMAT_VERSION2) {
- assert (tvx.length()-FORMAT_SIZE) % 16 == 0;
- numTotalDocs = (int) (tvx.length() >> 4);
- } else {
- assert (tvx.length()-FORMAT_SIZE) % 8 == 0;
- numTotalDocs = (int) (tvx.length() >> 3);
- }
+ assert (tvx.length()-FORMAT_SIZE) % 16 == 0;
+ numTotalDocs = (int) (tvx.length() >> 4);
if (-1 == docStoreOffset) {
this.docStoreOffset = 0;
@@ -133,11 +124,8 @@ class TermVectorsReader implements Clone
return tvf;
}
- final private void seekTvx(final int docNum) throws IOException {
- if (format < FORMAT_VERSION2)
- tvx.seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
- else
- tvx.seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
+ private void seekTvx(final int docNum) throws IOException {
+ tvx.seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
}
boolean canReadRawDocs() {
@@ -160,7 +148,7 @@ class TermVectorsReader implements Clone
// SegmentMerger calls canReadRawDocs() first and should
// not call us if that returns false.
- if (format < FORMAT_VERSION2)
+ if (format < FORMAT_UTF8_LENGTH_IN_BYTES)
throw new IllegalStateException("cannot read raw docs with older term vector formats");
seekTvx(startDocID);
@@ -242,11 +230,7 @@ class TermVectorsReader implements Clone
int number = 0;
int found = -1;
for (int i = 0; i < fieldCount; i++) {
- if (format >= FORMAT_VERSION)
- number = tvd.readVInt();
- else
- number += tvd.readVInt();
-
+ number = tvd.readVInt();
if (number == fieldNumber)
found = i;
}
@@ -255,11 +239,7 @@ class TermVectorsReader implements Clone
// document
if (found != -1) {
// Compute position in the tvf file
- long position;
- if (format >= FORMAT_VERSION2)
- position = tvx.readLong();
- else
- position = tvd.readVLong();
+ long position = tvx.readLong();
for (int i = 1; i <= found; i++)
position += tvd.readVLong();
@@ -292,16 +272,12 @@ class TermVectorsReader implements Clone
// Reads the String[] fields; you have to pre-seek tvd to
// the right point
- final private String[] readFields(int fieldCount) throws IOException {
+ private String[] readFields(int fieldCount) throws IOException {
int number = 0;
String[] fields = new String[fieldCount];
for (int i = 0; i < fieldCount; i++) {
- if (format >= FORMAT_VERSION)
- number = tvd.readVInt();
- else
- number += tvd.readVInt();
-
+ number = tvd.readVInt();
fields[i] = fieldInfos.fieldName(number);
}
@@ -310,13 +286,9 @@ class TermVectorsReader implements Clone
// Reads the long[] offsets into TVF; you have to pre-seek
// tvx/tvd to the right point
- final private long[] readTvfPointers(int fieldCount) throws IOException {
+ private long[] readTvfPointers(int fieldCount) throws IOException {
// Compute position in the tvf file
- long position;
- if (format >= FORMAT_VERSION2)
- position = tvx.readLong();
- else
- position = tvd.readVLong();
+ long position = tvx.readLong();
long[] tvfPointers = new long[fieldCount];
tvfPointers[0] = position;
@@ -425,32 +397,18 @@ class TermVectorsReader implements Clone
boolean storePositions;
boolean storeOffsets;
- if (format >= FORMAT_VERSION){
- byte bits = tvf.readByte();
- storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
- storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
- }
- else{
- tvf.readVInt();
- storePositions = false;
- storeOffsets = false;
- }
+ byte bits = tvf.readByte();
+ storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+
mapper.setExpectations(field, numTerms, storeOffsets, storePositions);
int start = 0;
int deltaLength = 0;
int totalLength = 0;
byte[] byteBuffer;
- char[] charBuffer;
- final boolean preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;
- // init the buffers
- if (preUTF8) {
- charBuffer = new char[10];
- byteBuffer = null;
- } else {
- charBuffer = null;
- byteBuffer = new byte[20];
- }
+ // init the buffer
+ byteBuffer = new byte[20];
for (int i = 0; i < numTerms; i++) {
start = tvf.readVInt();
@@ -459,26 +417,17 @@ class TermVectorsReader implements Clone
final String term;
- if (preUTF8) {
- // Term stored as java chars
- if (charBuffer.length < totalLength) {
- charBuffer = ArrayUtil.grow(charBuffer, totalLength);
- }
- tvf.readChars(charBuffer, start, deltaLength);
- term = new String(charBuffer, 0, totalLength);
- } else {
- // Term stored as utf8 bytes
- if (byteBuffer.length < totalLength) {
- byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
- }
- tvf.readBytes(byteBuffer, start, deltaLength);
- term = new String(byteBuffer, 0, totalLength, "UTF-8");
+ // Term stored as utf8 bytes
+ if (byteBuffer.length < totalLength) {
+ byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
}
+ tvf.readBytes(byteBuffer, start, deltaLength);
+ term = new String(byteBuffer, 0, totalLength, "UTF-8");
int freq = tvf.readVInt();
int [] positions = null;
if (storePositions) { //read in the positions
//does the mapper even care about positions?
- if (mapper.isIgnoringPositions() == false) {
+ if (!mapper.isIgnoringPositions()) {
positions = new int[freq];
int prevPosition = 0;
for (int j = 0; j < freq; j++)
@@ -498,7 +447,7 @@ class TermVectorsReader implements Clone
TermVectorOffsetInfo[] offsets = null;
if (storeOffsets) {
//does the mapper even care about offsets?
- if (mapper.isIgnoringOffsets() == false) {
+ if (!mapper.isIgnoringOffsets()) {
offsets = new TermVectorOffsetInfo[freq];
int prevOffset = 0;
for (int j = 0; j < freq; j++) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java Sun May 30 12:25:52 2010
@@ -36,9 +36,6 @@ public final class SegmentTermEnum exten
long size;
long position = -1;
- /** The file format version, a negative number. */
- public static final int FORMAT = -3;
-
// Changed strings to true utf8 with length-in-bytes not
// length-in-chars
public static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4;
@@ -97,19 +94,11 @@ public final class SegmentTermEnum exten
} else {
indexInterval = input.readInt();
skipInterval = input.readInt();
- if (format <= FORMAT) {
- // this new format introduces multi-level skipping
- maxSkipLevels = input.readInt();
- }
+ maxSkipLevels = input.readInt();
}
assert indexInterval > 0: "indexInterval=" + indexInterval + " is negative; must be > 0";
assert skipInterval > 0: "skipInterval=" + skipInterval + " is negative; must be > 0";
}
- if (format > FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
- termBuffer.setPreUTF8Strings();
- scanBuffer.setPreUTF8Strings();
- prevBuffer.setPreUTF8Strings();
- }
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java Sun May 30 12:25:52 2010
@@ -29,7 +29,6 @@ final class TermBuffer implements Clonea
private String field;
private Term term; // cached
- private boolean preUTF8Strings; // true if strings are stored in modified UTF8 encoding (LUCENE-510)
private boolean dirty; // true if text was set externally (ie not read via UTF8 bytes)
private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result();
@@ -42,8 +41,8 @@ final class TermBuffer implements Clonea
return field.compareTo(other.field);
}
- private static final int compareChars(char[] chars1, int len1,
- char[] chars2, int len2) {
+ private static int compareChars(char[] chars1, int len1,
+ char[] chars2, int len2) {
final int end = len1 < len2 ? len1:len2;
for (int k = 0; k < end; k++) {
char c1 = chars1[k];
@@ -55,41 +54,28 @@ final class TermBuffer implements Clonea
return len1 - len2;
}
- /** Call this if the IndexInput passed to {@link #read}
- * stores terms in the "modified UTF8" (pre LUCENE-510)
- * format. */
- void setPreUTF8Strings() {
- preUTF8Strings = true;
- }
-
public final void read(IndexInput input, FieldInfos fieldInfos)
throws IOException {
this.term = null; // invalidate cache
int start = input.readVInt();
int length = input.readVInt();
int totalLength = start + length;
- if (preUTF8Strings) {
- text.setLength(totalLength);
- input.readChars(text.result, start, length);
+ if (dirty) {
+ // Fully convert all bytes since bytes is dirty
+ UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
+ if (bytes.bytes.length < totalLength)
+ bytes.bytes = new byte[totalLength];
+ bytes.length = totalLength;
+ input.readBytes(bytes.bytes, start, length);
+ UnicodeUtil.UTF8toUTF16(bytes.bytes, 0, totalLength, text);
+ dirty = false;
} else {
-
- if (dirty) {
- // Fully convert all bytes since bytes is dirty
- UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
- if (bytes.bytes.length < totalLength)
- bytes.bytes = new byte[totalLength];
- bytes.length = totalLength;
- input.readBytes(bytes.bytes, start, length);
- UnicodeUtil.UTF8toUTF16(bytes.bytes, 0, totalLength, text);
- dirty = false;
- } else {
- // Incrementally convert only the UTF8 bytes that are new:
- if (bytes.bytes.length < totalLength)
- bytes.bytes = ArrayUtil.grow(bytes.bytes, totalLength);
- bytes.length = totalLength;
- input.readBytes(bytes.bytes, start, length);
- UnicodeUtil.UTF8toUTF16(bytes.bytes, start, length, text);
- }
+ // Incrementally convert only the UTF8 bytes that are new:
+ if (bytes.bytes.length < totalLength)
+ bytes.bytes = ArrayUtil.grow(bytes.bytes, totalLength);
+ bytes.length = totalLength;
+ input.readBytes(bytes.bytes, start, length);
+ UnicodeUtil.UTF8toUTF16(bytes.bytes, start, length, text);
}
this.field = fieldInfos.fieldName(input.readVInt());
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java?rev=949509&r1=949508&r2=949509&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java Sun May 30 12:25:52 2010
@@ -29,8 +29,6 @@ import org.apache.lucene.util.RamUsageEs
* data types.
*/
public abstract class DataInput implements Cloneable {
- private boolean preUTF8Strings; // true if we are reading old (modified UTF8) string format
-
/** Reads and returns a single byte.
* @see DataOutput#writeByte(byte)
*/
@@ -114,89 +112,16 @@ public abstract class DataInput implemen
return i;
}
- /** Call this if readString should read characters stored
- * in the old modified UTF8 format (length in java chars
- * and java's modified UTF8 encoding). This is used for
- * indices written pre-2.4 See LUCENE-510 for details. */
- public void setModifiedUTF8StringsMode() {
- preUTF8Strings = true;
- }
-
/** Reads a string.
* @see DataOutput#writeString(String)
*/
public String readString() throws IOException {
- if (preUTF8Strings)
- return readModifiedUTF8String();
int length = readVInt();
final byte[] bytes = new byte[length];
readBytes(bytes, 0, length);
return new String(bytes, 0, length, "UTF-8");
}
- private String readModifiedUTF8String() throws IOException {
- int length = readVInt();
- final char[] chars = new char[length];
- readChars(chars, 0, length);
- return new String(chars, 0, length);
- }
-
- /** Reads Lucene's old "modified UTF-8" encoded
- * characters into an array.
- * @param buffer the array to read characters into
- * @param start the offset in the array to start storing characters
- * @param length the number of characters to read
- * @see DataOutput#writeChars(String,int,int)
- * @deprecated -- please use readString or readBytes
- * instead, and construct the string
- * from those utf8 bytes
- */
- @Deprecated
- public void readChars(char[] buffer, int start, int length)
- throws IOException {
- final int end = start + length;
- for (int i = start; i < end; i++) {
- byte b = readByte();
- if ((b & 0x80) == 0)
- buffer[i] = (char)(b & 0x7F);
- else if ((b & 0xE0) != 0xE0) {
- buffer[i] = (char)(((b & 0x1F) << 6)
- | (readByte() & 0x3F));
- } else {
- buffer[i] = (char)(((b & 0x0F) << 12)
- | ((readByte() & 0x3F) << 6)
- | (readByte() & 0x3F));
- }
- }
- }
-
- /**
- * Expert
- *
- * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still
- * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
- * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
- * how many more bytes to read
- * @param length The number of chars to read
- * @deprecated this method operates on old "modified utf8" encoded
- * strings
- */
- @Deprecated
- public void skipChars(int length) throws IOException{
- for (int i = 0; i < length; i++) {
- byte b = readByte();
- if ((b & 0x80) == 0){
- //do nothing, we only need one byte
- } else if ((b & 0xE0) != 0xE0) {
- readByte();//read an additional byte
- } else {
- //read two additional bytes.
- readByte();
- readByte();
- }
- }
- }
-
/** Returns a clone of this stream.
*
* <p>Clones of a stream access the same data, and are positioned at the same