You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/09 14:53:29 UTC

svn commit: r792535 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: mikemccand
Date: Thu Jul  9 12:53:28 2009
New Revision: 792535

URL: http://svn.apache.org/viewvc?rev=792535&view=rev
Log:
LUCENE-1727: make sure fields are stored in the exact order they were added to the document

Removed:
    lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jul  9 12:53:28 2009
@@ -130,6 +130,11 @@
  9. LUCENE-1717: Fixed IndexWriter to account for RAM usage of
     buffered deletions.  (Mike McCandless)
 
+10. LUCENE-1727: Ensure that fields are stored & retrieved in the
+    exact order in which they were added to the document.  This was
+    true in all Lucene releases before 2.3, but was broken in 2.3 and
+    2.4, and is now fixed in 2.9.  (Mike McCandless)
+
 API Changes
 
 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java Thu Jul  9 12:53:28 2009
@@ -36,15 +36,18 @@
   final DocumentsWriter docWriter;
   final FieldInfos fieldInfos = new FieldInfos();
   final DocFieldConsumer consumer;
+  final StoredFieldsWriter fieldsWriter;
 
   public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
     this.docWriter = docWriter;
     this.consumer = consumer;
     consumer.setFieldInfos(fieldInfos);
+    fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
   }
 
   public void closeDocStore(SegmentWriteState state) throws IOException {
     consumer.closeDocStore(state);
+    fieldsWriter.closeDocStore(state);
   }
 
   public void flush(Collection threads, SegmentWriteState state) throws IOException {
@@ -56,7 +59,7 @@
       childThreadsAndFields.put(perThread.consumer, perThread.fields());
       perThread.trimFields(state);
     }
-
+    fieldsWriter.flush(state);
     consumer.flush(childThreadsAndFields, state);
 
     // Important to save after asking consumer to flush so
@@ -69,6 +72,7 @@
   }
 
   public void abort() {
+    fieldsWriter.abort();
     consumer.abort();
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Thu Jul  9 12:53:28 2009
@@ -23,6 +23,7 @@
 import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
 
 /**
  * Gathers all Fieldables for a document under the same
@@ -50,13 +51,16 @@
   int hashMask = 1;
   int totalFieldCount;
 
+  final StoredFieldsWriterPerThread fieldsWriter;
+
   final DocumentsWriter.DocState docState;
-  
+
   public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
     this.docState = threadState.docState;
     this.docFieldProcessor = docFieldProcessor;
     this.fieldInfos = docFieldProcessor.fieldInfos;
     this.consumer = docFieldProcessor.consumer.addThread(this);
+    fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
   }
 
   public void abort() {
@@ -68,6 +72,7 @@
         field = next;
       }
     }
+    fieldsWriter.abort();
     consumer.abort();
   }
 
@@ -148,6 +153,8 @@
   public DocumentsWriter.DocWriter processDocument() throws IOException {
 
     consumer.startDocument();
+    fieldsWriter.startDocument();
+
     final Document doc = docState.doc;
 
     assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
@@ -220,6 +227,9 @@
       }
 
       fp.fields[fp.fieldCount++] = field;
+      if (field.isStored()) {
+        fieldsWriter.addField(field, fp.fieldInfo);
+      }
     }
 
     // If we are writing vectors then we must visit
@@ -236,7 +246,21 @@
     if (docState.maxTermPrefix != null && docState.infoStream != null)
       docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); 
 
-    return consumer.finishDocument();
+    final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
+    final DocumentsWriter.DocWriter two = consumer.finishDocument();
+    if (one == null) {
+      return two;
+    } else if (two == null) {
+      return one;
+    } else {
+      PerDoc both = getPerDoc();
+      both.docID = docState.docID;
+      assert one.docID == docState.docID;
+      assert two.docID == docState.docID;
+      both.one = one;
+      both.two = two;
+      return both;
+    }
   }
 
   void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) {
@@ -299,4 +323,62 @@
     quickSort(array, lo, left);
     quickSort(array, left + 1, hi);
   }
+
+  PerDoc[] docFreeList = new PerDoc[1];
+  int freeCount;
+  int allocCount;
+
+  synchronized PerDoc getPerDoc() {
+    if (freeCount == 0) {
+      allocCount++;
+      if (allocCount > docFreeList.length) {
+        // Grow our free list up front to make sure we have
+        // enough space to recycle all outstanding PerDoc
+        // instances
+        assert allocCount == 1+docFreeList.length;
+        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+      }
+      return new PerDoc();
+    } else
+      return docFreeList[--freeCount];
+  }
+
+  synchronized void freePerDoc(PerDoc perDoc) {
+    assert freeCount < docFreeList.length;
+    docFreeList[freeCount++] = perDoc;
+  }
+
+  class PerDoc extends DocumentsWriter.DocWriter {
+
+    DocumentsWriter.DocWriter one;
+    DocumentsWriter.DocWriter two;
+
+    public long sizeInBytes() {
+      return one.sizeInBytes() + two.sizeInBytes();
+    }
+
+    public void finish() throws IOException {
+      try {
+        try {
+          one.finish();
+        } finally {
+          two.finish();
+        }
+      } finally {
+        freePerDoc(this);
+      }
+    }
+
+    public void abort() {
+      try {
+        try {
+          one.abort();
+        } finally {
+          two.abort();
+        }
+      } finally {
+        freePerDoc(this);
+      }
+    }
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jul  9 12:53:28 2009
@@ -214,9 +214,7 @@
                                                            new TermsHash(documentsWriter, false, termVectorsWriter, null));
       final NormsWriter normsWriter = new NormsWriter();
       final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
-      final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
-      final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
-      return new DocFieldProcessor(documentsWriter, docFieldConsumers);
+      return new DocFieldProcessor(documentsWriter, docInverter);
     }
   };
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java Thu Jul  9 12:53:28 2009
@@ -17,30 +17,31 @@
  * limitations under the License.
  */
 
-import java.util.Map;
 import java.io.IOException;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
 
 /** This is a DocFieldConsumer that writes stored fields. */
-final class StoredFieldsWriter extends DocFieldConsumer {
+final class StoredFieldsWriter {
 
   FieldsWriter fieldsWriter;
   final DocumentsWriter docWriter;
+  final FieldInfos fieldInfos;
   int lastDocID;
 
   PerDoc[] docFreeList = new PerDoc[1];
   int freeCount;
 
-  public StoredFieldsWriter(DocumentsWriter docWriter) {
+  public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) {
     this.docWriter = docWriter;
+    this.fieldInfos = fieldInfos;
   }
 
-  public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException {
-    return new StoredFieldsWriterPerThread(docFieldProcessorPerThread, this);
+  public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException {
+    return new StoredFieldsWriterPerThread(docState, this);
   }
 
-  synchronized public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
+  synchronized public void flush(SegmentWriteState state) throws IOException {
 
     if (state.numDocsInStore > 0) {
       // It's possible that all documents seen in this segment

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java Thu Jul  9 12:53:28 2009
@@ -19,8 +19,9 @@
 
 import java.io.IOException;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.document.Fieldable;
 
-final class StoredFieldsWriterPerThread extends DocFieldConsumerPerThread {
+final class StoredFieldsWriterPerThread {
 
   final FieldsWriter localFieldsWriter;
   final StoredFieldsWriter storedFieldsWriter;
@@ -28,9 +29,9 @@
 
   StoredFieldsWriter.PerDoc doc;
 
-  public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter) throws IOException {
+  public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException {
     this.storedFieldsWriter = storedFieldsWriter;
-    this.docState = docFieldProcessorPerThread.docState;
+    this.docState = docState;
     localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
   }
 
@@ -44,6 +45,21 @@
     }
   }
 
+  public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
+    if (doc == null) {
+      doc = storedFieldsWriter.getPerDoc();
+      doc.docID = docState.docID;
+      localFieldsWriter.setFieldsStream(doc.fdt);
+      assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields;
+      assert 0 == doc.fdt.length();
+      assert 0 == doc.fdt.getFilePointer();
+    }
+
+    localFieldsWriter.writeField(fieldInfo, field);
+    assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
+    doc.numStoredFields++;
+  }
+
   public DocumentsWriter.DocWriter finishDocument() {
     // If there were any stored fields in this doc, doc will
     // be non-null; else it's null.
@@ -60,8 +76,4 @@
       doc = null;
     }
   }
-
-  public DocFieldConsumerPerField addField(FieldInfo fieldInfo) {
-    return new StoredFieldsWriterPerField(this, fieldInfo);
-  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=792535&r1=792534&r2=792535&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Jul  9 12:53:28 2009
@@ -24,6 +24,7 @@
 import java.util.Random;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.Iterator;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.SinkTokenizer;
@@ -4408,4 +4409,36 @@
     dir.close();
 
   }
+
+  // LUCENE-1727: make sure doc fields are stored in order
+  public void testStoredFieldsOrder() throws Throwable {
+    Directory d = new MockRAMDirectory();
+    IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+    Document doc = new Document();
+    doc.add(new Field("zzz", "a b c", Field.Store.YES, Field.Index.NO));
+    doc.add(new Field("aaa", "a b c", Field.Store.YES, Field.Index.NO));
+    doc.add(new Field("zzz", "1 2 3", Field.Store.YES, Field.Index.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    doc = r.document(0);
+    Iterator it = doc.getFields().iterator();
+    assertTrue(it.hasNext());
+    Field f = (Field) it.next();
+    assertEquals(f.name(), "zzz");
+    assertEquals(f.stringValue(), "a b c");
+
+    assertTrue(it.hasNext());
+    f = (Field) it.next();
+    assertEquals(f.name(), "aaa");
+    assertEquals(f.stringValue(), "a b c");
+
+    assertTrue(it.hasNext());
+    f = (Field) it.next();
+    assertEquals(f.name(), "zzz");
+    assertEquals(f.stringValue(), "1 2 3");
+    assertFalse(it.hasNext());
+    r.close();
+    w.close();
+    d.close();
+  }
 }