You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/01/30 16:19:32 UTC
svn commit: r1237713 - in /lucene/dev/branches/lucene3661/lucene/src:
java/org/apache/lucene/codecs/lucene3x/
java/org/apache/lucene/codecs/lucene40/
test-framework/java/org/apache/lucene/codecs/preflexrw/
Author: rmuir
Date: Mon Jan 30 15:19:32 2012
New Revision: 1237713
URL: http://svn.apache.org/viewvc?rev=1237713&view=rev
Log:
LUCENE-3728: split stored fields into 3x/4x so more docstore stuff can go in 3x
Added:
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java (with props)
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java (with props)
lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java (with props)
lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java (with props)
Modified:
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java?rev=1237713&r1=1237712&r2=1237713&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java Mon Jan 30 15:19:32 2012
@@ -29,10 +29,8 @@ import org.apache.lucene.codecs.PerDocPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfosFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
-import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
@@ -53,13 +51,7 @@ public class Lucene3xCodec extends Codec
private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat();
- // TODO: this should really be a different impl
- private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() {
- @Override
- public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
- throw new UnsupportedOperationException("this codec can only be used for reading");
- }
- };
+ private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat();
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java?rev=1237713&r1=1237712&r2=1237713&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java Mon Jan 30 15:19:32 2012
@@ -66,7 +66,7 @@ public class Lucene3xSegmentInfosReader
}
try {
- Lucene40StoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
+ Lucene3xStoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
} finally {
// If we opened the directory, close it
if (dir != directory) dir.close();
Added: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java?rev=1237713&view=auto
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java (added)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java Mon Jan 30 15:19:32 2012
@@ -0,0 +1,51 @@
+package org.apache.lucene.codecs.lucene3x;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/** @deprecated */
+@Deprecated
+public class Lucene3xStoredFieldsFormat extends StoredFieldsFormat {
+
+ @Override
+ public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
+ FieldInfos fn, IOContext context) throws IOException {
+ return new Lucene3xStoredFieldsReader(directory, si, fn, context);
+ }
+
+ @Override
+ public StoredFieldsWriter fieldsWriter(Directory directory, String segment,
+ IOContext context) throws IOException {
+ throw new UnsupportedOperationException("this codec can only be used for reading");
+ }
+
+ @Override
+ public void files(SegmentInfo info, Set<String> files) throws IOException {
+ Lucene3xStoredFieldsReader.files(info, files);
+ }
+}
Added: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java?rev=1237713&view=auto
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java (added)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java Mon Jan 30 15:19:32 2012
@@ -0,0 +1,333 @@
+package org.apache.lucene.codecs.lucene3x;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexFormatTooNewException;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.Closeable;
+import java.nio.charset.Charset;
+import java.util.Set;
+
+/**
+ * Class responsible for access to stored document fields.
+ * <p/>
+ * It uses <segment>.fdt and <segment>.fdx; files.
+ *
+ * @deprecated
+ */
+@Deprecated
+public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
+ private final static int FORMAT_SIZE = 4;
+
+ /** Extension of stored fields file */
+ public static final String FIELDS_EXTENSION = "fdt";
+
+ /** Extension of stored fields index file */
+ public static final String FIELDS_INDEX_EXTENSION = "fdx";
+
+ // Lucene 3.0: Removal of compressed fields
+ static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+
+ // Lucene 3.2: NumericFields are stored in binary format
+ static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
+ // NOTE: if you introduce a new format, make it 1 higher
+ // than the current one, and always change this if you
+ // switch to a new format!
+ public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
+
+ // when removing support for old versions, leave the last supported version here
+ static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+
+ // NOTE: bit 0 is free here! You can steal it!
+ public static final int FIELD_IS_BINARY = 1 << 1;
+
+ // the old bit 1 << 2 was compressed, is now left out
+
+ private static final int _NUMERIC_BIT_SHIFT = 3;
+ static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+ public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+ public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+ public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+ public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+
+ private final FieldInfos fieldInfos;
+ private final IndexInput fieldsStream;
+ private final IndexInput indexStream;
+ private int numTotalDocs;
+ private int size;
+ private boolean closed;
+ private final int format;
+
+ // The docID offset where our docs begin in the index
+ // file. This will be 0 if we have our own private file.
+ private int docStoreOffset;
+
+ /** Returns a cloned FieldsReader that shares open
+ * IndexInputs with the original one. It is the caller's
+ * job not to close the original FieldsReader until all
+ * clones are called (eg, currently SegmentReader manages
+ * this logic). */
+ @Override
+ public Lucene3xStoredFieldsReader clone() {
+ ensureOpen();
+ return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
+ }
+
+ /** Verifies that the code version which wrote the segment is supported. */
+ public static void checkCodeVersion(Directory dir, String segment) throws IOException {
+ final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
+ IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
+
+ try {
+ int format = idxStream.readInt();
+ if (format < FORMAT_MINIMUM)
+ throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+ if (format > FORMAT_CURRENT)
+ throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+ } finally {
+ idxStream.close();
+ }
+ }
+
+ // Used only by clone
+ private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
+ IndexInput fieldsStream, IndexInput indexStream) {
+ this.fieldInfos = fieldInfos;
+ this.numTotalDocs = numTotalDocs;
+ this.size = size;
+ this.format = format;
+ this.docStoreOffset = docStoreOffset;
+ this.fieldsStream = fieldsStream;
+ this.indexStream = indexStream;
+ }
+
+ public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+ final String segment = si.getDocStoreSegment();
+ final int docStoreOffset = si.getDocStoreOffset();
+ final int size = si.docCount;
+ boolean success = false;
+ fieldInfos = fn;
+ try {
+ fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
+ final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
+ indexStream = d.openInput(indexStreamFN, context);
+
+ format = indexStream.readInt();
+
+ if (format < FORMAT_MINIMUM)
+ throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+ if (format > FORMAT_CURRENT)
+ throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+
+ final long indexSize = indexStream.length() - FORMAT_SIZE;
+
+ if (docStoreOffset != -1) {
+ // We read only a slice out of this shared fields file
+ this.docStoreOffset = docStoreOffset;
+ this.size = size;
+
+ // Verify the file is long enough to hold all of our
+ // docs
+ assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
+ } else {
+ this.docStoreOffset = 0;
+ this.size = (int) (indexSize >> 3);
+ // Verify two sources of "maxDoc" agree:
+ if (this.size != si.docCount) {
+ throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
+ }
+ }
+ numTotalDocs = (int) (indexSize >> 3);
+ success = true;
+ } finally {
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success) {
+ close();
+ }
+ }
+ }
+
+ /**
+ * @throws AlreadyClosedException if this FieldsReader is closed
+ */
+ private void ensureOpen() throws AlreadyClosedException {
+ if (closed) {
+ throw new AlreadyClosedException("this FieldsReader is closed");
+ }
+ }
+
+ /**
+ * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams.
+ * This means that the Fields values will not be accessible.
+ *
+ * @throws IOException
+ */
+ public final void close() throws IOException {
+ if (!closed) {
+ IOUtils.close(fieldsStream, indexStream);
+ closed = true;
+ }
+ }
+
+ public final int size() {
+ return size;
+ }
+
+ private void seekIndex(int docID) throws IOException {
+ indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
+ }
+
+ public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
+ seekIndex(n);
+ fieldsStream.seek(indexStream.readLong());
+
+ final int numFields = fieldsStream.readVInt();
+ for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
+ int fieldNumber = fieldsStream.readVInt();
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+
+ int bits = fieldsStream.readByte() & 0xFF;
+ assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
+
+ switch(visitor.needsField(fieldInfo)) {
+ case YES:
+ readField(visitor, fieldInfo, bits);
+ break;
+ case NO:
+ skipField(bits);
+ break;
+ case STOP:
+ return;
+ }
+ }
+ }
+
+ private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
+ final int numeric = bits & FIELD_IS_NUMERIC_MASK;
+ if (numeric != 0) {
+ switch(numeric) {
+ case FIELD_IS_NUMERIC_INT:
+ visitor.intField(info, fieldsStream.readInt());
+ return;
+ case FIELD_IS_NUMERIC_LONG:
+ visitor.longField(info, fieldsStream.readLong());
+ return;
+ case FIELD_IS_NUMERIC_FLOAT:
+ visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
+ return;
+ case FIELD_IS_NUMERIC_DOUBLE:
+ visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
+ return;
+ default:
+ throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+ } else {
+ final int length = fieldsStream.readVInt();
+ byte bytes[] = new byte[length];
+ fieldsStream.readBytes(bytes, 0, length);
+ if ((bits & FIELD_IS_BINARY) != 0) {
+ visitor.binaryField(info, bytes, 0, bytes.length);
+ } else {
+ visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
+ }
+ }
+ }
+
+ private void skipField(int bits) throws IOException {
+ final int numeric = bits & FIELD_IS_NUMERIC_MASK;
+ if (numeric != 0) {
+ switch(numeric) {
+ case FIELD_IS_NUMERIC_INT:
+ case FIELD_IS_NUMERIC_FLOAT:
+ fieldsStream.readInt();
+ return;
+ case FIELD_IS_NUMERIC_LONG:
+ case FIELD_IS_NUMERIC_DOUBLE:
+ fieldsStream.readLong();
+ return;
+ default:
+ throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+ } else {
+ final int length = fieldsStream.readVInt();
+ fieldsStream.seek(fieldsStream.getFilePointer() + length);
+ }
+ }
+
+ /** Returns the length in bytes of each raw document in a
+ * contiguous range of length numDocs starting with
+ * startDocID. Returns the IndexInput (the fieldStream),
+ * already seeked to the starting point for startDocID.*/
+ public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
+ seekIndex(startDocID);
+ long startOffset = indexStream.readLong();
+ long lastOffset = startOffset;
+ int count = 0;
+ while (count < numDocs) {
+ final long offset;
+ final int docID = docStoreOffset + startDocID + count + 1;
+ assert docID <= numTotalDocs;
+ if (docID < numTotalDocs)
+ offset = indexStream.readLong();
+ else
+ offset = fieldsStream.length();
+ lengths[count++] = (int) (offset-lastOffset);
+ lastOffset = offset;
+ }
+
+ fieldsStream.seek(startOffset);
+
+ return fieldsStream;
+ }
+
+ // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
+ // only preflex segments refer to these?
+ public static void files(SegmentInfo info, Set<String> files) throws IOException {
+ if (info.getDocStoreOffset() != -1) {
+ assert info.getDocStoreSegment() != null;
+ if (!info.getDocStoreIsCompoundFile()) {
+ files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
+ }
+ } else {
+ files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
+ }
+ }
+}
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java?rev=1237713&r1=1237712&r2=1237713&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java Mon Jan 30 15:19:32 2012
@@ -24,8 +24,6 @@ import org.apache.lucene.index.CorruptIn
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFormatTooNewException;
-import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
@@ -35,7 +33,6 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
-import java.nio.charset.Charset;
import java.util.Set;
/**
@@ -54,11 +51,6 @@ public final class Lucene40StoredFieldsR
private int numTotalDocs;
private int size;
private boolean closed;
- private final int format;
-
- // The docID offset where our docs begin in the index
- // file. This will be 0 if we have our own private file.
- private int docStoreOffset;
/** Returns a cloned FieldsReader that shares open
* IndexInputs with the original one. It is the caller's
@@ -68,41 +60,20 @@ public final class Lucene40StoredFieldsR
@Override
public Lucene40StoredFieldsReader clone() {
ensureOpen();
- return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
- }
-
- /** Verifies that the code version which wrote the segment is supported. */
- public static void checkCodeVersion(Directory dir, String segment) throws IOException {
- final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
- IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
-
- try {
- int format = idxStream.readInt();
- if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
- throw new IndexFormatTooOldException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
- if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
- throw new IndexFormatTooNewException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
- } finally {
- idxStream.close();
- }
+ return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
}
// Used only by clone
- private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
- IndexInput fieldsStream, IndexInput indexStream) {
+ private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
this.size = size;
- this.format = format;
- this.docStoreOffset = docStoreOffset;
this.fieldsStream = fieldsStream;
this.indexStream = indexStream;
}
public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
- final String segment = si.getDocStoreSegment();
- final int docStoreOffset = si.getDocStoreOffset();
- final int size = si.docCount;
+ final String segment = si.name;
boolean success = false;
fieldInfos = fn;
try {
@@ -110,30 +81,17 @@ public final class Lucene40StoredFieldsR
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
- format = indexStream.readInt();
-
- if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
- throw new IndexFormatTooOldException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
- if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
- throw new IndexFormatTooNewException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
+ // its a 4.0 codec: so its not too-old, its corrupt.
+ // TODO: change this to CodecUtil.checkHeader
+ if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
+ throw new CorruptIndexException("unexpected fdx header: " + indexStream);
+ }
final long indexSize = indexStream.length() - FORMAT_SIZE;
-
- if (docStoreOffset != -1) {
- // We read only a slice out of this shared fields file
- this.docStoreOffset = docStoreOffset;
- this.size = size;
-
- // Verify the file is long enough to hold all of our
- // docs
- assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
- } else {
- this.docStoreOffset = 0;
- this.size = (int) (indexSize >> 3);
- // Verify two sources of "maxDoc" agree:
- if (this.size != si.docCount) {
- throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
- }
+ this.size = (int) (indexSize >> 3);
+ // Verify two sources of "maxDoc" agree:
+ if (this.size != si.docCount) {
+ throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
}
numTotalDocs = (int) (indexSize >> 3);
success = true;
@@ -176,7 +134,7 @@ public final class Lucene40StoredFieldsR
}
private void seekIndex(int docID) throws IOException {
- indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
+ indexStream.seek(FORMAT_SIZE + docID * 8L);
}
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
@@ -203,8 +161,6 @@ public final class Lucene40StoredFieldsR
}
}
}
-
- static final Charset UTF8 = Charset.forName("UTF-8");
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
@@ -232,7 +188,7 @@ public final class Lucene40StoredFieldsR
if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
- visitor.stringField(info, new String(bytes, 0, bytes.length, UTF8));
+ visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
}
}
}
@@ -269,7 +225,7 @@ public final class Lucene40StoredFieldsR
int count = 0;
while (count < numDocs) {
final long offset;
- final int docID = docStoreOffset + startDocID + count + 1;
+ final int docID = startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs)
offset = indexStream.readLong();
@@ -283,19 +239,9 @@ public final class Lucene40StoredFieldsR
return fieldsStream;
}
-
- // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
- // only preflex segments refer to these?
+
public static void files(SegmentInfo info, Set<String> files) throws IOException {
- if (info.getDocStoreOffset() != -1) {
- assert info.getDocStoreSegment() != null;
- if (!info.getDocStoreIsCompoundFile()) {
- files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
- files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
- }
- } else {
- files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
- files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
- }
+ files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
}
}
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java?rev=1237713&r1=1237712&r2=1237713&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java Mon Jan 30 15:19:32 2012
@@ -55,10 +55,7 @@ public final class Lucene40StoredFieldsW
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
- // Lucene 3.0: Removal of compressed fields
- static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
-
- // Lucene 3.2: NumericFields are stored in binary format
+ // (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
// NOTE: if you introduce a new format, make it 1 higher
@@ -67,7 +64,7 @@ public final class Lucene40StoredFieldsW
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
- static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+ static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
Modified: lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java?rev=1237713&r1=1237712&r2=1237713&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java Mon Jan 30 15:19:32 2012
@@ -45,10 +45,9 @@ public class PreFlexRWCodec extends Luce
private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat();
private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat();
private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat();
+ private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat();
// TODO: this should really be a different impl
private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat();
- // TODO: this should really be a different impl
- private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
@Override
public PostingsFormat postingsFormat() {
Added: lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java?rev=1237713&view=auto
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java (added)
+++ lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java Mon Jan 30 15:19:32 2012
@@ -0,0 +1,17 @@
+package org.apache.lucene.codecs.preflexrw;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsFormat;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+public class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat {
+
+ @Override
+ public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
+ return new PreFlexRWStoredFieldsWriter(directory, segment, context);
+ }
+
+}
Added: lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java?rev=1237713&view=auto
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java (added)
+++ lucene/dev/branches/lucene3661/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java Mon Jan 30 15:19:32 2012
@@ -0,0 +1,156 @@
+package org.apache.lucene.codecs.preflexrw;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/** @lucene.experimental */
+public final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter {
+ private final Directory directory;
+ private final String segment;
+ private IndexOutput fieldsStream;
+ private IndexOutput indexStream;
+
+ public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
+ assert directory != null;
+ this.directory = directory;
+ this.segment = segment;
+
+ boolean success = false;
+ try {
+ fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context);
+ indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context);
+
+ fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
+ indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
+
+ success = true;
+ } finally {
+ if (!success) {
+ abort();
+ }
+ }
+ }
+
+ // Writes the contents of buffer into the fields stream
+ // and adds a new entry for this document into the index
+ // stream. This assumes the buffer was already written
+ // in the correct fields format.
+ public void startDocument(int numStoredFields) throws IOException {
+ indexStream.writeLong(fieldsStream.getFilePointer());
+ fieldsStream.writeVInt(numStoredFields);
+ }
+
+ public void close() throws IOException {
+ try {
+ IOUtils.close(fieldsStream, indexStream);
+ } finally {
+ fieldsStream = indexStream = null;
+ }
+ }
+
+ public void abort() {
+ try {
+ close();
+ } catch (IOException ignored) {}
+ IOUtils.deleteFilesIgnoringExceptions(directory,
+ IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION),
+ IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
+ }
+
+ public void writeField(FieldInfo info, IndexableField field) throws IOException {
+ fieldsStream.writeVInt(info.number);
+ int bits = 0;
+ final BytesRef bytes;
+ final String string;
+ // TODO: maybe a field should serialize itself?
+ // this way we don't bake into indexer all these
+ // specific encodings for different fields? and apps
+ // can customize...
+
+ Number number = field.numericValue();
+ if (number != null) {
+ if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+ bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT;
+ } else if (number instanceof Long) {
+ bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG;
+ } else if (number instanceof Float) {
+ bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT;
+ } else if (number instanceof Double) {
+ bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE;
+ } else {
+ throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
+ }
+ string = null;
+ bytes = null;
+ } else {
+ bytes = field.binaryValue();
+ if (bytes != null) {
+ bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY;
+ string = null;
+ } else {
+ string = field.stringValue();
+ if (string == null) {
+ throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
+ }
+ }
+ }
+
+ fieldsStream.writeByte((byte) bits);
+
+ if (bytes != null) {
+ fieldsStream.writeVInt(bytes.length);
+ fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ } else if (string != null) {
+ fieldsStream.writeString(field.stringValue());
+ } else {
+ if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+ fieldsStream.writeInt(number.intValue());
+ } else if (number instanceof Long) {
+ fieldsStream.writeLong(number.longValue());
+ } else if (number instanceof Float) {
+ fieldsStream.writeInt(Float.floatToIntBits(number.floatValue()));
+ } else if (number instanceof Double) {
+ fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue()));
+ } else {
+ assert false;
+ }
+ }
+ }
+
+ @Override
+ public void finish(int numDocs) throws IOException {
+ if (4+((long) numDocs)*8 != indexStream.getFilePointer())
+ // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+ // we detect that the bug has struck, here, and
+ // throw an exception to prevent the corruption from
+ // entering the index. See LUCENE-1282 for
+ // details.
+ throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
+ }
+}