You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/07/24 15:40:22 UTC
svn commit: r978872 - in /lucene/dev/branches/preflexfixes/lucene/src:
java/org/apache/lucene/index/codecs/preflex/ test/org/apache/lucene/index/
test/org/apache/lucene/index/codecs/preflex/
test/org/apache/lucene/index/codecs/preflexrw/ test/org/apach...
Author: mikemccand
Date: Sat Jul 24 13:40:22 2010
New Revision: 978872
URL: http://svn.apache.org/viewvc?rev=978872&view=rev
Log:
LUCENE-2554: make PreFlexRW codec use finer-grained impersonation, so we can test dancing NRT/deletions too
Modified:
lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java
lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java
Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java Sat Jul 24 13:40:22 2010
@@ -62,7 +62,7 @@ public class PreFlexCodec extends Codec
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, true);
+ return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor);
}
@Override
Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Sat Jul 24 13:40:22 2010
@@ -58,19 +58,11 @@ public class PreFlexFields extends Field
private final Directory dir;
private final int readBufferSize;
private Directory cfsReader;
- private final boolean unicodeSortOrder;
- // If unicodeSortOrder is true, we do the surrogates dance
- // so that the terms are sorted by unicode sort order.
- // This should be true when segments are used for "normal"
- // searching; it's only false during testing, to create a
- // pre-flex index, using the preflexrw codec under
- // src/test.
- public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor, boolean unicodeSortOrder)
+ public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
throws IOException {
si = info;
- this.unicodeSortOrder = unicodeSortOrder;
// NOTE: we must always load terms index, even for
// "sequential" scan during merging, because what is
@@ -114,6 +106,15 @@ public class PreFlexFields extends Field
this.dir = dir;
}
+ // If this returns, we do the surrogates dance so that the
+ // terms are sorted by unicode sort order. This should be
+ // true when segments are used for "normal" searching;
+ // it's only false during testing, to create a pre-flex
+ // index, using the test-only PreFlexRW.
+ protected boolean sortTermsByUnicode() {
+ return true;
+ }
+
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
@@ -241,7 +242,7 @@ public class PreFlexFields extends Field
public Comparator<BytesRef> getComparator() {
// Pre-flex indexes always sorted in UTF16 order, but
// we remap on-the-fly to unicode order
- if (unicodeSortOrder) {
+ if (sortTermsByUnicode()) {
return BytesRef.getUTF8SortedAsUnicodeComparator();
} else {
return BytesRef.getUTF8SortedAsUTF16Comparator();
@@ -692,6 +693,8 @@ public class PreFlexFields extends Field
}
}
+ private boolean unicodeSortOrder;
+
void reset(FieldInfo fieldInfo) throws IOException {
//System.out.println("pff.reset te=" + termEnum);
this.fieldInfo = fieldInfo;
@@ -705,6 +708,8 @@ public class PreFlexFields extends Field
}
skipNext = true;
+ unicodeSortOrder = sortTermsByUnicode();
+
final Term t = termEnum.term();
if (t != null && t.field() == fieldInfo.name) {
newSuffixStart = 0;
Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Sat Jul 24 13:40:22 2010
@@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.util.*;
-import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import junit.framework.Assert;
@@ -263,10 +262,7 @@ public class TestStressIndexing2 extends
}
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
- // When we're testing w/ PreFlex codec, we must open
- // this reader with UTF16 terms since incoming NRT
- // reader is sorted this way:
- IndexReader r2 = IndexReader.open(dir2, null, true, _TestUtil.nextInt(r, 1, 3), _TestUtil.alwaysCodec(new PreFlexRWCodec("utf16")));
+ IndexReader r2 = IndexReader.open(dir2);
verifyEquals(r1, r2, idField);
r2.close();
}
Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Sat Jul 24 13:40:22 2010
@@ -63,10 +63,14 @@ public class TestSurrogates extends Luce
private String getRandomString(Random r) {
String s;
- if (r.nextInt(3) == 1) {
- s = makeDifficultRandomUnicodeString(r);
+ if (r.nextInt(5) == 1) {
+ if (r.nextInt(3) == 1) {
+ s = makeDifficultRandomUnicodeString(r);
+ } else {
+ s = _TestUtil.randomUnicodeString(r);
+ }
} else {
- s = _TestUtil.randomUnicodeString(r);
+ s = _TestUtil.randomRealisticUnicodeString(r);
}
return s;
}
@@ -272,7 +276,7 @@ public class TestSurrogates extends Luce
RandomIndexWriter w = new RandomIndexWriter(r,
dir,
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
- new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec(null))));
+ new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
final int numField = _TestUtil.nextInt(r, 2, 5);
Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java Sat Jul 24 13:40:22 2010
@@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.pr
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.util.LuceneTestCaseJ4;
/** Codec, only for testing, that can write and read the
* pre-flex index format.
@@ -33,20 +34,14 @@ import org.apache.lucene.index.codecs.Fi
*/
public class PreFlexRWCodec extends PreFlexCodec {
- private final String termSortOrder;
-
- // termSortOrder should be null (dynamically deteremined
- // by stack), "codepoint" or "utf16"
- public PreFlexRWCodec(String termSortOrder) {
+ public PreFlexRWCodec() {
// NOTE: we impersonate the PreFlex codec so that it can
// read the segments we write!
super();
- this.termSortOrder = termSortOrder;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- System.out.println("PFW");
return new PreFlexFieldsWriter(state);
}
@@ -56,23 +51,27 @@ public class PreFlexRWCodec extends PreF
// Whenever IW opens readers, eg for merging, we have to
// keep terms order in UTF16:
- boolean unicodeSortOrder;
- if (termSortOrder == null) {
- unicodeSortOrder = true;
-
- StackTraceElement[] trace = new Exception().getStackTrace();
- for (int i = 0; i < trace.length; i++) {
- //System.out.println(trace[i].getClassName());
- if ("org.apache.lucene.index.IndexWriter".equals(trace[i].getClassName())) {
- unicodeSortOrder = false;
- break;
+ return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
+ @Override
+ protected boolean sortTermsByUnicode() {
+ // We carefully peek into stack track above us: if
+ // we are part of a "merge", we must sort by UTF16:
+ boolean unicodeSortOrder = true;
+
+ StackTraceElement[] trace = new Exception().getStackTrace();
+ for (int i = 0; i < trace.length; i++) {
+ //System.out.println(trace[i].getClassName());
+ if ("merge".equals(trace[i].getMethodName())) {
+ unicodeSortOrder = false;
+ if (LuceneTestCaseJ4.VERBOSE) {
+ System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
+ }
+ break;
+ }
}
- }
- //System.out.println("PRW: " + unicodeSortOrder);
- } else {
- unicodeSortOrder = termSortOrder.equals("codepoint");
- }
- return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, unicodeSortOrder);
+ return unicodeSortOrder;
+ }
+ };
}
}
Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java Sat Jul 24 13:40:22 2010
@@ -128,7 +128,7 @@ public abstract class LuceneTestCase ext
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
- CodecProvider.getDefault().register(new PreFlexRWCodec(null));
+ CodecProvider.getDefault().register(new PreFlexRWCodec());
}
CodecProvider.setDefaultCodec(codec);
}
@@ -158,7 +158,7 @@ public abstract class LuceneTestCase ext
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
- CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
+ CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec());
}
CodecProvider.setDefaultCodec(savedDefaultCodec);
Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java Sat Jul 24 13:40:22 2010
@@ -152,7 +152,7 @@ public class LuceneTestCaseJ4 {
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
- CodecProvider.getDefault().register(new PreFlexRWCodec(null));
+ CodecProvider.getDefault().register(new PreFlexRWCodec());
}
CodecProvider.setDefaultCodec(codec);
}
@@ -161,7 +161,7 @@ public class LuceneTestCaseJ4 {
public static void afterClassLuceneTestCaseJ4() {
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
- CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
+ CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec());
}
CodecProvider.setDefaultCodec(savedDefaultCodec);