You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/09 14:53:29 UTC
svn commit: r792535 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Author: mikemccand
Date: Thu Jul 9 12:53:28 2009
New Revision: 792535
URL: http://svn.apache.org/viewvc?rev=792535&view=rev
Log:
LUCENE-1727: make sure fields are stored in the exact order they were added to the document
Removed:
lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jul 9 12:53:28 2009
@@ -130,6 +130,11 @@
9. LUCENE-1717: Fixed IndexWriter to account for RAM usage of
buffered deletions. (Mike McCandless)
+10. LUCENE-1727: Ensure that fields are stored & retrieved in the
+ exact order in which they were added to the document. This was
+ true in all Lucene releases before 2.3, but was broken in 2.3 and
+ 2.4, and is now fixed in 2.9. (Mike McCandless)
+
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java Thu Jul 9 12:53:28 2009
@@ -36,15 +36,18 @@
final DocumentsWriter docWriter;
final FieldInfos fieldInfos = new FieldInfos();
final DocFieldConsumer consumer;
+ final StoredFieldsWriter fieldsWriter;
public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
this.docWriter = docWriter;
this.consumer = consumer;
consumer.setFieldInfos(fieldInfos);
+ fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
}
public void closeDocStore(SegmentWriteState state) throws IOException {
consumer.closeDocStore(state);
+ fieldsWriter.closeDocStore(state);
}
public void flush(Collection threads, SegmentWriteState state) throws IOException {
@@ -56,7 +59,7 @@
childThreadsAndFields.put(perThread.consumer, perThread.fields());
perThread.trimFields(state);
}
-
+ fieldsWriter.flush(state);
consumer.flush(childThreadsAndFields, state);
// Important to save after asking consumer to flush so
@@ -69,6 +72,7 @@
}
public void abort() {
+ fieldsWriter.abort();
consumer.abort();
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Thu Jul 9 12:53:28 2009
@@ -23,6 +23,7 @@
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
/**
* Gathers all Fieldables for a document under the same
@@ -50,13 +51,16 @@
int hashMask = 1;
int totalFieldCount;
+ final StoredFieldsWriterPerThread fieldsWriter;
+
final DocumentsWriter.DocState docState;
-
+
public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
this.docState = threadState.docState;
this.docFieldProcessor = docFieldProcessor;
this.fieldInfos = docFieldProcessor.fieldInfos;
this.consumer = docFieldProcessor.consumer.addThread(this);
+ fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
}
public void abort() {
@@ -68,6 +72,7 @@
field = next;
}
}
+ fieldsWriter.abort();
consumer.abort();
}
@@ -148,6 +153,8 @@
public DocumentsWriter.DocWriter processDocument() throws IOException {
consumer.startDocument();
+ fieldsWriter.startDocument();
+
final Document doc = docState.doc;
assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
@@ -220,6 +227,9 @@
}
fp.fields[fp.fieldCount++] = field;
+ if (field.isStored()) {
+ fieldsWriter.addField(field, fp.fieldInfo);
+ }
}
// If we are writing vectors then we must visit
@@ -236,7 +246,21 @@
if (docState.maxTermPrefix != null && docState.infoStream != null)
docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
- return consumer.finishDocument();
+ final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
+ final DocumentsWriter.DocWriter two = consumer.finishDocument();
+ if (one == null) {
+ return two;
+ } else if (two == null) {
+ return one;
+ } else {
+ PerDoc both = getPerDoc();
+ both.docID = docState.docID;
+ assert one.docID == docState.docID;
+ assert two.docID == docState.docID;
+ both.one = one;
+ both.two = two;
+ return both;
+ }
}
void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) {
@@ -299,4 +323,62 @@
quickSort(array, lo, left);
quickSort(array, left + 1, hi);
}
+
+ PerDoc[] docFreeList = new PerDoc[1];
+ int freeCount;
+ int allocCount;
+
+ synchronized PerDoc getPerDoc() {
+ if (freeCount == 0) {
+ allocCount++;
+ if (allocCount > docFreeList.length) {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ assert allocCount == 1+docFreeList.length;
+ docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+ }
+ return new PerDoc();
+ } else
+ return docFreeList[--freeCount];
+ }
+
+ synchronized void freePerDoc(PerDoc perDoc) {
+ assert freeCount < docFreeList.length;
+ docFreeList[freeCount++] = perDoc;
+ }
+
+ class PerDoc extends DocumentsWriter.DocWriter {
+
+ DocumentsWriter.DocWriter one;
+ DocumentsWriter.DocWriter two;
+
+ public long sizeInBytes() {
+ return one.sizeInBytes() + two.sizeInBytes();
+ }
+
+ public void finish() throws IOException {
+ try {
+ try {
+ one.finish();
+ } finally {
+ two.finish();
+ }
+ } finally {
+ freePerDoc(this);
+ }
+ }
+
+ public void abort() {
+ try {
+ try {
+ one.abort();
+ } finally {
+ two.abort();
+ }
+ } finally {
+ freePerDoc(this);
+ }
+ }
+ }
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jul 9 12:53:28 2009
@@ -214,9 +214,7 @@
new TermsHash(documentsWriter, false, termVectorsWriter, null));
final NormsWriter normsWriter = new NormsWriter();
final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
- final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
- final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
- return new DocFieldProcessor(documentsWriter, docFieldConsumers);
+ return new DocFieldProcessor(documentsWriter, docInverter);
}
};
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java Thu Jul 9 12:53:28 2009
@@ -17,30 +17,31 @@
* limitations under the License.
*/
-import java.util.Map;
import java.io.IOException;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
/** This is a DocFieldConsumer that writes stored fields. */
-final class StoredFieldsWriter extends DocFieldConsumer {
+final class StoredFieldsWriter {
FieldsWriter fieldsWriter;
final DocumentsWriter docWriter;
+ final FieldInfos fieldInfos;
int lastDocID;
PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
- public StoredFieldsWriter(DocumentsWriter docWriter) {
+ public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) {
this.docWriter = docWriter;
+ this.fieldInfos = fieldInfos;
}
- public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException {
- return new StoredFieldsWriterPerThread(docFieldProcessorPerThread, this);
+ public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException {
+ return new StoredFieldsWriterPerThread(docState, this);
}
- synchronized public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
+ synchronized public void flush(SegmentWriteState state) throws IOException {
if (state.numDocsInStore > 0) {
// It's possible that all documents seen in this segment
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java Thu Jul 9 12:53:28 2009
@@ -19,8 +19,9 @@
import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.document.Fieldable;
-final class StoredFieldsWriterPerThread extends DocFieldConsumerPerThread {
+final class StoredFieldsWriterPerThread {
final FieldsWriter localFieldsWriter;
final StoredFieldsWriter storedFieldsWriter;
@@ -28,9 +29,9 @@
StoredFieldsWriter.PerDoc doc;
- public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter) throws IOException {
+ public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException {
this.storedFieldsWriter = storedFieldsWriter;
- this.docState = docFieldProcessorPerThread.docState;
+ this.docState = docState;
localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
}
@@ -44,6 +45,21 @@
}
}
+ public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
+ if (doc == null) {
+ doc = storedFieldsWriter.getPerDoc();
+ doc.docID = docState.docID;
+ localFieldsWriter.setFieldsStream(doc.fdt);
+ assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields;
+ assert 0 == doc.fdt.length();
+ assert 0 == doc.fdt.getFilePointer();
+ }
+
+ localFieldsWriter.writeField(fieldInfo, field);
+ assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
+ doc.numStoredFields++;
+ }
+
public DocumentsWriter.DocWriter finishDocument() {
// If there were any stored fields in this doc, doc will
// be non-null; else it's null.
@@ -60,8 +76,4 @@
doc = null;
}
}
-
- public DocFieldConsumerPerField addField(FieldInfo fieldInfo) {
- return new StoredFieldsWriterPerField(this, fieldInfo);
- }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Jul 9 12:53:28 2009
@@ -24,6 +24,7 @@
import java.util.Random;
import java.util.Map;
import java.util.HashMap;
+import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SinkTokenizer;
@@ -4408,4 +4409,36 @@
dir.close();
}
+
+ // LUCENE-1727: make sure doc fields are stored in order
+ public void testStoredFieldsOrder() throws Throwable {
+ Directory d = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ Document doc = new Document();
+ doc.add(new Field("zzz", "a b c", Field.Store.YES, Field.Index.NO));
+ doc.add(new Field("aaa", "a b c", Field.Store.YES, Field.Index.NO));
+ doc.add(new Field("zzz", "1 2 3", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+ IndexReader r = w.getReader();
+ doc = r.document(0);
+ Iterator it = doc.getFields().iterator();
+ assertTrue(it.hasNext());
+ Field f = (Field) it.next();
+ assertEquals(f.name(), "zzz");
+ assertEquals(f.stringValue(), "a b c");
+
+ assertTrue(it.hasNext());
+ f = (Field) it.next();
+ assertEquals(f.name(), "aaa");
+ assertEquals(f.stringValue(), "a b c");
+
+ assertTrue(it.hasNext());
+ f = (Field) it.next();
+ assertEquals(f.name(), "zzz");
+ assertEquals(f.stringValue(), "1 2 3");
+ assertFalse(it.hasNext());
+ r.close();
+ w.close();
+ d.close();
+ }
}