You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/10/14 21:23:30 UTC

svn commit: r1183464 - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/simpletext/ src/test-framework/org/apache/lucene/store/ src/test/org/apache/lucene/index/

Author: mikemccand
Date: Fri Oct 14 19:23:29 2011
New Revision: 1183464

URL: http://svn.apache.org/viewvc?rev=1183464&view=rev
Log:
LUCENE-3515: fix over-cloning during merging

Added:
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Oct 14 19:23:29 2011
@@ -596,6 +596,11 @@ Bug fixes
   rarely cause deletions to be incorrectly applied.  (Yonik Seeley,
   Simon Willnauer, Mike McCandless)
 
+* LUCENE-3515: Fix terrible merge performance versus 3.x, especially
+  when the directory isn't MMapDirectory, due to failing to reuse
+  DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
+  Robert Muir, Simon Willnauer, Mike McCandless)
+
 ======================= Lucene 3.5.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Fri Oct 14 19:23:29 2011
@@ -29,6 +29,8 @@ import java.io.IOException;
  */
 
 public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+  private final MultiTermsEnum parent;
+  final DocsAndPositionsEnum[] subDocsAndPositionsEnum;
   private EnumWithSlice[] subs;
   int numSubs;
   int upto;
@@ -36,7 +38,16 @@ public final class MultiDocsAndPositions
   int currentBase;
   int doc = -1;
 
-  MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
+  public MultiDocsAndPositionsEnum(MultiTermsEnum parent, int subReaderCount) {
+    this.parent = parent;
+    subDocsAndPositionsEnum = new DocsAndPositionsEnum[subReaderCount];
+  }
+
+  public boolean canReuse(MultiTermsEnum parent) {
+    return this.parent == parent;
+  }
+
+  public MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
     this.numSubs = numSubs;
     this.subs = new EnumWithSlice[subs.length];
     for(int i=0;i<subs.length;i++) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java Fri Oct 14 19:23:29 2011
@@ -27,6 +27,8 @@ import java.io.IOException;
  */
 
 public final class MultiDocsEnum extends DocsEnum {
+  private final MultiTermsEnum parent;
+  final DocsEnum[] subDocsEnum;
   private EnumWithSlice[] subs;
   int numSubs;
   int upto;
@@ -34,6 +36,11 @@ public final class MultiDocsEnum extends
   int currentBase;
   int doc = -1;
 
+  public MultiDocsEnum(MultiTermsEnum parent, int subReaderCount) {
+    this.parent = parent;
+    subDocsEnum = new DocsEnum[subReaderCount];
+  }
+
   MultiDocsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
     this.numSubs = numSubs;
 
@@ -48,6 +55,10 @@ public final class MultiDocsEnum extends
     return this;
   }
 
+  public boolean canReuse(MultiTermsEnum parent) {
+    return this.parent == parent;
+  }
+
   public int getNumSubs() {
     return numSubs;
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Fri Oct 14 19:23:29 2011
@@ -77,7 +77,7 @@ public final class MultiTermsEnum extend
     subDocs = new MultiDocsEnum.EnumWithSlice[slices.length];
     subDocsAndPositions = new MultiDocsAndPositionsEnum.EnumWithSlice[slices.length];
     for(int i=0;i<slices.length;i++) {
-      subs[i] = new TermsEnumWithSlice(slices[i]);
+      subs[i] = new TermsEnumWithSlice(i, slices[i]);
       subDocs[i] = new MultiDocsEnum.EnumWithSlice();
       subDocs[i].slice = slices[i];
       subDocsAndPositions[i] = new MultiDocsAndPositionsEnum.EnumWithSlice();
@@ -347,11 +347,16 @@ public final class MultiTermsEnum extend
 
   @Override
   public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
-    final MultiDocsEnum docsEnum;
-    if (reuse != null) {
+    MultiDocsEnum docsEnum;
+    // Can only reuse if incoming enum is also a MultiDocsEnum
+    if (reuse != null && reuse instanceof MultiDocsEnum) {
       docsEnum = (MultiDocsEnum) reuse;
+      // ... and was previously created w/ this MultiTermsEnum:
+      if (!docsEnum.canReuse(this)) {
+        docsEnum = new MultiDocsEnum(this, subs.length);
+      }
     } else {
-      docsEnum = new MultiDocsEnum();
+      docsEnum = new MultiDocsEnum(this, subs.length);
     }
     
     final MultiBits multiLiveDocs;
@@ -390,8 +395,11 @@ public final class MultiTermsEnum extend
         b = null;
       }
 
-      final DocsEnum subDocsEnum = entry.terms.docs(b, null);
+      assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
+      final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index]);
+
       if (subDocsEnum != null) {
+        docsEnum.subDocsEnum[entry.index] = subDocsEnum;
         subDocs[upto].docsEnum = subDocsEnum;
         subDocs[upto].slice = entry.subSlice;
 
@@ -408,11 +416,16 @@ public final class MultiTermsEnum extend
 
   @Override
   public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-    final MultiDocsAndPositionsEnum docsAndPositionsEnum;
-    if (reuse != null) {
+    MultiDocsAndPositionsEnum docsAndPositionsEnum;
+    // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
+    if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
       docsAndPositionsEnum = (MultiDocsAndPositionsEnum) reuse;
+      // ... and was previously created w/ this MultiTermsEnum:
+      if (!docsAndPositionsEnum.canReuse(this)) {
+        docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
+      }
     } else {
-      docsAndPositionsEnum = new MultiDocsAndPositionsEnum();
+      docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
     }
     
     final MultiBits multiLiveDocs;
@@ -452,9 +465,11 @@ public final class MultiTermsEnum extend
         b = null;
       }
 
-      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, null);
+      assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
+      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index]);
 
       if (subPostings != null) {
+        docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
         subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
         subDocsAndPositions[upto].slice = entry.subSlice;
         upto++;
@@ -479,9 +494,11 @@ public final class MultiTermsEnum extend
     private final ReaderUtil.Slice subSlice;
     private TermsEnum terms;
     public BytesRef current;
+    final int index;
 
-    public TermsEnumWithSlice(ReaderUtil.Slice subSlice) {
+    public TermsEnumWithSlice(int index, ReaderUtil.Slice subSlice) {
       this.subSlice = subSlice;
+      this.index = index;
       assert subSlice.length >= 0: "length=" + subSlice.length;
     }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Oct 14 19:23:29 2011
@@ -216,7 +216,7 @@ class SimpleTextFieldsReader extends Fie
     @Override
     public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
       SimpleTextDocsEnum docsEnum;
-      if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(in)) {
+      if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
         docsEnum = (SimpleTextDocsEnum) reuse;
       } else {
         docsEnum = new SimpleTextDocsEnum();
@@ -231,7 +231,7 @@ class SimpleTextFieldsReader extends Fie
       }
 
       SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
-      if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(in)) {
+      if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
         docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
       } else {
         docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
@@ -249,7 +249,7 @@ class SimpleTextFieldsReader extends Fie
     private final IndexInput inStart;
     private final IndexInput in;
     private boolean omitTF;
-    private int docID;
+    private int docID = -1;
     private int tf;
     private Bits liveDocs;
     private final BytesRef scratch = new BytesRef(10);
@@ -268,6 +268,7 @@ class SimpleTextFieldsReader extends Fie
       this.liveDocs = liveDocs;
       in.seek(fp);
       this.omitTF = omitTF;
+      docID = -1;
       if (omitTF) {
         tf = 1;
       }

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Fri Oct 14 19:23:29 2011
@@ -30,6 +30,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.codecs.CodecProvider;
@@ -74,6 +75,8 @@ public class MockDirectoryWrapper extend
   private ThrottledIndexOutput throttledOutput;
   private Throttling throttling = Throttling.SOMETIMES;
 
+  final AtomicInteger inputCloneCount = new AtomicInteger();
+
   // use this for tracking files for crash.
   // additionally: provides debugging information in case you leave one open
   private Map<Closeable,Exception> openFileHandles = Collections.synchronizedMap(new IdentityHashMap<Closeable,Exception>());
@@ -117,6 +120,10 @@ public class MockDirectoryWrapper extend
     init();
   }
 
+  public int getInputCloneCount() {
+    return inputCloneCount.get();
+  }
+
   public void setTrackDiskUsage(boolean v) {
     trackDiskUsage = v;
   }

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java Fri Oct 14 19:23:29 2011
@@ -57,6 +57,7 @@ public class MockIndexInputWrapper exten
 
   @Override
   public Object clone() {
+    dir.inputCloneCount.incrementAndGet();
     IndexInput iiclone = (IndexInput) delegate.clone();
     MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone);
     clone.isClone = true;

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java?rev=1183464&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java Fri Oct 14 19:23:29 2011
@@ -0,0 +1,83 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.*;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestForTooMuchCloning extends LuceneTestCase {
+
+  // Make sure we don't clone IndexInputs too frequently
+  // during merging:
+  public void test() throws Exception {
+    String codec = CodecProvider.getDefault().getFieldCodec("field");
+    // TODO: once LUCENE-3517 is fixed, remove this:
+    assumeFalse("PulsingCodec fails this test because of over-cloning", codec.equals("Pulsing") || codec.equals("MockRandom"));
+    final MockDirectoryWrapper dir = newDirectory();
+    final TieredMergePolicy tmp = new TieredMergePolicy();
+    tmp.setMaxMergeAtOnce(2);
+    final RandomIndexWriter w = new RandomIndexWriter(random, dir,
+                                                      newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(tmp));
+    final int numDocs = 20;
+    for(int docs=0;docs<numDocs;docs++) {
+      StringBuilder sb = new StringBuilder();
+      for(int terms=0;terms<100;terms++) {
+        sb.append(_TestUtil.randomRealisticUnicodeString(random));
+        sb.append(' ');
+      }
+      final Document doc = new Document();
+      doc.add(new TextField("field", sb.toString()));
+      w.addDocument(doc);
+    }
+    final IndexReader r = w.getReader();
+    w.close();
+
+    final int cloneCount = dir.getInputCloneCount();
+    //System.out.println("merge clone count=" + cloneCount);
+    assertTrue("too many calls to IndexInput.clone during merging: " + dir.getInputCloneCount(), cloneCount < 500);
+
+    final IndexSearcher s = new IndexSearcher(r);
+
+    // MTQ that matches all terms so the AUTO_REWRITE should
+    // cutover to filter rewrite and reuse a single DocsEnum
+    // across all terms;
+    final TopDocs hits = s.search(new TermRangeQuery("field",
+                                                     new BytesRef(),
+                                                     new BytesRef("\uFFFF"),
+                                                     true,
+                                                     true), 10);
+    assertTrue(hits.totalHits > 0);
+    final int queryCloneCount = dir.getInputCloneCount() - cloneCount;
+    //System.out.println("query clone count=" + queryCloneCount);
+    assertTrue("too many calls to IndexInput.clone during TermRangeQuery: " + queryCloneCount, queryCloneCount < 50);
+    s.close();
+    r.close();
+    dir.close();
+  }
+}

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java Fri Oct 14 19:23:29 2011
@@ -42,6 +42,7 @@ public class TestRollingUpdates extends 
     }
 
     final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(provider));
+    w.setInfoStream(VERBOSE ? System.out : null);
     final int SIZE = atLeast(TEST_NIGHTLY ? 100 : 20);
     int id = 0;
     IndexReader r = null;