You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/10/14 21:23:30 UTC
svn commit: r1183464 - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/index/
src/java/org/apache/lucene/index/codecs/simpletext/
src/test-framework/org/apache/lucene/store/ src/test/org/apache/lucene/index/
Author: mikemccand
Date: Fri Oct 14 19:23:29 2011
New Revision: 1183464
URL: http://svn.apache.org/viewvc?rev=1183464&view=rev
Log:
LUCENE-3515: fix over-cloning during merging
Added:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Oct 14 19:23:29 2011
@@ -596,6 +596,11 @@ Bug fixes
rarely cause deletions to be incorrectly applied. (Yonik Seeley,
Simon Willnauer, Mike McCandless)
+* LUCENE-3515: Fix terrible merge performance versus 3.x, especially
+ when the directory isn't MMapDirectory, due to failing to reuse
+ DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
+ Robert Muir, Simon Willnauer, Mike McCandless)
+
======================= Lucene 3.5.0 =======================
Changes in backwards compatibility policy
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Fri Oct 14 19:23:29 2011
@@ -29,6 +29,8 @@ import java.io.IOException;
*/
public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final MultiTermsEnum parent;
+ final DocsAndPositionsEnum[] subDocsAndPositionsEnum;
private EnumWithSlice[] subs;
int numSubs;
int upto;
@@ -36,7 +38,16 @@ public final class MultiDocsAndPositions
int currentBase;
int doc = -1;
- MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
+ public MultiDocsAndPositionsEnum(MultiTermsEnum parent, int subReaderCount) {
+ this.parent = parent;
+ subDocsAndPositionsEnum = new DocsAndPositionsEnum[subReaderCount];
+ }
+
+ public boolean canReuse(MultiTermsEnum parent) {
+ return this.parent == parent;
+ }
+
+ public MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
this.numSubs = numSubs;
this.subs = new EnumWithSlice[subs.length];
for(int i=0;i<subs.length;i++) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java Fri Oct 14 19:23:29 2011
@@ -27,6 +27,8 @@ import java.io.IOException;
*/
public final class MultiDocsEnum extends DocsEnum {
+ private final MultiTermsEnum parent;
+ final DocsEnum[] subDocsEnum;
private EnumWithSlice[] subs;
int numSubs;
int upto;
@@ -34,6 +36,11 @@ public final class MultiDocsEnum extends
int currentBase;
int doc = -1;
+ public MultiDocsEnum(MultiTermsEnum parent, int subReaderCount) {
+ this.parent = parent;
+ subDocsEnum = new DocsEnum[subReaderCount];
+ }
+
MultiDocsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
this.numSubs = numSubs;
@@ -48,6 +55,10 @@ public final class MultiDocsEnum extends
return this;
}
+ public boolean canReuse(MultiTermsEnum parent) {
+ return this.parent == parent;
+ }
+
public int getNumSubs() {
return numSubs;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Fri Oct 14 19:23:29 2011
@@ -77,7 +77,7 @@ public final class MultiTermsEnum extend
subDocs = new MultiDocsEnum.EnumWithSlice[slices.length];
subDocsAndPositions = new MultiDocsAndPositionsEnum.EnumWithSlice[slices.length];
for(int i=0;i<slices.length;i++) {
- subs[i] = new TermsEnumWithSlice(slices[i]);
+ subs[i] = new TermsEnumWithSlice(i, slices[i]);
subDocs[i] = new MultiDocsEnum.EnumWithSlice();
subDocs[i].slice = slices[i];
subDocsAndPositions[i] = new MultiDocsAndPositionsEnum.EnumWithSlice();
@@ -347,11 +347,16 @@ public final class MultiTermsEnum extend
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
- final MultiDocsEnum docsEnum;
- if (reuse != null) {
+ MultiDocsEnum docsEnum;
+ // Can only reuse if incoming enum is also a MultiDocsEnum
+ if (reuse != null && reuse instanceof MultiDocsEnum) {
docsEnum = (MultiDocsEnum) reuse;
+ // ... and was previously created w/ this MultiTermsEnum:
+ if (!docsEnum.canReuse(this)) {
+ docsEnum = new MultiDocsEnum(this, subs.length);
+ }
} else {
- docsEnum = new MultiDocsEnum();
+ docsEnum = new MultiDocsEnum(this, subs.length);
}
final MultiBits multiLiveDocs;
@@ -390,8 +395,11 @@ public final class MultiTermsEnum extend
b = null;
}
- final DocsEnum subDocsEnum = entry.terms.docs(b, null);
+ assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
+ final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index]);
+
if (subDocsEnum != null) {
+ docsEnum.subDocsEnum[entry.index] = subDocsEnum;
subDocs[upto].docsEnum = subDocsEnum;
subDocs[upto].slice = entry.subSlice;
@@ -408,11 +416,16 @@ public final class MultiTermsEnum extend
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- final MultiDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse != null) {
+ MultiDocsAndPositionsEnum docsAndPositionsEnum;
+ // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
+ if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
docsAndPositionsEnum = (MultiDocsAndPositionsEnum) reuse;
+ // ... and was previously created w/ this MultiTermsEnum:
+ if (!docsAndPositionsEnum.canReuse(this)) {
+ docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
+ }
} else {
- docsAndPositionsEnum = new MultiDocsAndPositionsEnum();
+ docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
}
final MultiBits multiLiveDocs;
@@ -452,9 +465,11 @@ public final class MultiTermsEnum extend
b = null;
}
- final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, null);
+ assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
+ final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index]);
if (subPostings != null) {
+ docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
subDocsAndPositions[upto].slice = entry.subSlice;
upto++;
@@ -479,9 +494,11 @@ public final class MultiTermsEnum extend
private final ReaderUtil.Slice subSlice;
private TermsEnum terms;
public BytesRef current;
+ final int index;
- public TermsEnumWithSlice(ReaderUtil.Slice subSlice) {
+ public TermsEnumWithSlice(int index, ReaderUtil.Slice subSlice) {
this.subSlice = subSlice;
+ this.index = index;
assert subSlice.length >= 0: "length=" + subSlice.length;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Oct 14 19:23:29 2011
@@ -216,7 +216,7 @@ class SimpleTextFieldsReader extends Fie
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
SimpleTextDocsEnum docsEnum;
- if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(in)) {
+ if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
} else {
docsEnum = new SimpleTextDocsEnum();
@@ -231,7 +231,7 @@ class SimpleTextFieldsReader extends Fie
}
SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(in)) {
+ if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
} else {
docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
@@ -249,7 +249,7 @@ class SimpleTextFieldsReader extends Fie
private final IndexInput inStart;
private final IndexInput in;
private boolean omitTF;
- private int docID;
+ private int docID = -1;
private int tf;
private Bits liveDocs;
private final BytesRef scratch = new BytesRef(10);
@@ -268,6 +268,7 @@ class SimpleTextFieldsReader extends Fie
this.liveDocs = liveDocs;
in.seek(fp);
this.omitTF = omitTF;
+ docID = -1;
if (omitTF) {
tf = 1;
}
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Fri Oct 14 19:23:29 2011
@@ -30,6 +30,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -74,6 +75,8 @@ public class MockDirectoryWrapper extend
private ThrottledIndexOutput throttledOutput;
private Throttling throttling = Throttling.SOMETIMES;
+ final AtomicInteger inputCloneCount = new AtomicInteger();
+
// use this for tracking files for crash.
// additionally: provides debugging information in case you leave one open
private Map<Closeable,Exception> openFileHandles = Collections.synchronizedMap(new IdentityHashMap<Closeable,Exception>());
@@ -117,6 +120,10 @@ public class MockDirectoryWrapper extend
init();
}
+ public int getInputCloneCount() {
+ return inputCloneCount.get();
+ }
+
public void setTrackDiskUsage(boolean v) {
trackDiskUsage = v;
}
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java Fri Oct 14 19:23:29 2011
@@ -57,6 +57,7 @@ public class MockIndexInputWrapper exten
@Override
public Object clone() {
+ dir.inputCloneCount.incrementAndGet();
IndexInput iiclone = (IndexInput) delegate.clone();
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone);
clone.isClone = true;
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java?rev=1183464&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestForTooMuchCloning.java Fri Oct 14 19:23:29 2011
@@ -0,0 +1,83 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.*;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestForTooMuchCloning extends LuceneTestCase {
+
+ // Make sure we don't clone IndexInputs too frequently
+ // during merging:
+ public void test() throws Exception {
+ String codec = CodecProvider.getDefault().getFieldCodec("field");
+ // TODO: once LUCENE-3517 is fixed, remove this:
+ assumeFalse("PulsingCodec fails this test because of over-cloning", codec.equals("Pulsing") || codec.equals("MockRandom"));
+ final MockDirectoryWrapper dir = newDirectory();
+ final TieredMergePolicy tmp = new TieredMergePolicy();
+ tmp.setMaxMergeAtOnce(2);
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(tmp));
+ final int numDocs = 20;
+ for(int docs=0;docs<numDocs;docs++) {
+ StringBuilder sb = new StringBuilder();
+ for(int terms=0;terms<100;terms++) {
+ sb.append(_TestUtil.randomRealisticUnicodeString(random));
+ sb.append(' ');
+ }
+ final Document doc = new Document();
+ doc.add(new TextField("field", sb.toString()));
+ w.addDocument(doc);
+ }
+ final IndexReader r = w.getReader();
+ w.close();
+
+ final int cloneCount = dir.getInputCloneCount();
+ //System.out.println("merge clone count=" + cloneCount);
+ assertTrue("too many calls to IndexInput.clone during merging: " + dir.getInputCloneCount(), cloneCount < 500);
+
+ final IndexSearcher s = new IndexSearcher(r);
+
+ // MTQ that matches all terms so the AUTO_REWRITE should
+ // cutover to filter rewrite and reuse a single DocsEnum
+ // across all terms;
+ final TopDocs hits = s.search(new TermRangeQuery("field",
+ new BytesRef(),
+ new BytesRef("\uFFFF"),
+ true,
+ true), 10);
+ assertTrue(hits.totalHits > 0);
+ final int queryCloneCount = dir.getInputCloneCount() - cloneCount;
+ //System.out.println("query clone count=" + queryCloneCount);
+ assertTrue("too many calls to IndexInput.clone during TermRangeQuery: " + queryCloneCount, queryCloneCount < 50);
+ s.close();
+ r.close();
+ dir.close();
+ }
+}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1183464&r1=1183463&r2=1183464&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java Fri Oct 14 19:23:29 2011
@@ -42,6 +42,7 @@ public class TestRollingUpdates extends
}
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(provider));
+ w.setInfoStream(VERBOSE ? System.out : null);
final int SIZE = atLeast(TEST_NIGHTLY ? 100 : 20);
int id = 0;
IndexReader r = null;