You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/07/24 15:40:22 UTC

svn commit: r978872 - in /lucene/dev/branches/preflexfixes/lucene/src: java/org/apache/lucene/index/codecs/preflex/ test/org/apache/lucene/index/ test/org/apache/lucene/index/codecs/preflex/ test/org/apache/lucene/index/codecs/preflexrw/ test/org/apach...

Author: mikemccand
Date: Sat Jul 24 13:40:22 2010
New Revision: 978872

URL: http://svn.apache.org/viewvc?rev=978872&view=rev
Log:
LUCENE-2554: make PreFlexRW codec use finer-grained impersonation, so we can test dancing NRT/deletions too

Modified:
    lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
    lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
    lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java
    lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
    lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java

Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java Sat Jul 24 13:40:22 2010
@@ -62,7 +62,7 @@ public class PreFlexCodec extends Codec 
 
   @Override
   public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
-    return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, true);
+    return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor);
   }
 
   @Override

Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Sat Jul 24 13:40:22 2010
@@ -58,19 +58,11 @@ public class PreFlexFields extends Field
   private final Directory dir;
   private final int readBufferSize;
   private Directory cfsReader;
-  private final boolean unicodeSortOrder;
 
-  // If unicodeSortOrder is true, we do the surrogates dance
-  // so that the terms are sorted by unicode sort order.
-  // This should be true when segments are used for "normal"
-  // searching; it's only false during testing, to create a
-  // pre-flex index, using the preflexrw codec under
-  // src/test.
-  public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor, boolean unicodeSortOrder)
+  public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
     throws IOException {
 
     si = info;
-    this.unicodeSortOrder = unicodeSortOrder;
 
     // NOTE: we must always load terms index, even for
     // "sequential" scan during merging, because what is
@@ -114,6 +106,15 @@ public class PreFlexFields extends Field
     this.dir = dir;
   }
 
+  // If this returns, we do the surrogates dance so that the
+  // terms are sorted by unicode sort order.  This should be
+  // true when segments are used for "normal" searching;
+  // it's only false during testing, to create a pre-flex
+  // index, using the test-only PreFlexRW.
+  protected boolean sortTermsByUnicode() {
+    return true;
+  }
+
   static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
     files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
     files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
@@ -241,7 +242,7 @@ public class PreFlexFields extends Field
     public Comparator<BytesRef> getComparator() {
       // Pre-flex indexes always sorted in UTF16 order, but
       // we remap on-the-fly to unicode order
-      if (unicodeSortOrder) {
+      if (sortTermsByUnicode()) {
         return BytesRef.getUTF8SortedAsUnicodeComparator();
       } else {
         return BytesRef.getUTF8SortedAsUTF16Comparator();
@@ -692,6 +693,8 @@ public class PreFlexFields extends Field
       }
     }
 
+    private boolean unicodeSortOrder;
+
     void reset(FieldInfo fieldInfo) throws IOException {
       //System.out.println("pff.reset te=" + termEnum);
       this.fieldInfo = fieldInfo;
@@ -705,6 +708,8 @@ public class PreFlexFields extends Field
       }
       skipNext = true;
 
+      unicodeSortOrder = sortTermsByUnicode();
+
       final Term t = termEnum.term();
       if (t != null && t.field() == fieldInfo.name) {
         newSuffixStart = 0;

Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Sat Jul 24 13:40:22 2010
@@ -25,7 +25,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import org.apache.lucene.util.*;
-import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
 
 import junit.framework.Assert;
 
@@ -263,10 +262,7 @@ public class TestStressIndexing2 extends
   }
   
   public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
-    // When we're testing w/ PreFlex codec, we must open
-    // this reader with UTF16 terms since incoming NRT
-    // reader is sorted this way:
-    IndexReader r2 = IndexReader.open(dir2, null, true, _TestUtil.nextInt(r, 1, 3), _TestUtil.alwaysCodec(new PreFlexRWCodec("utf16")));
+    IndexReader r2 = IndexReader.open(dir2);
     verifyEquals(r1, r2, idField);
     r2.close();
   }

Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Sat Jul 24 13:40:22 2010
@@ -63,10 +63,14 @@ public class TestSurrogates extends Luce
 
   private String getRandomString(Random r) {
     String s;
-    if (r.nextInt(3) == 1) {
-      s = makeDifficultRandomUnicodeString(r);
+    if (r.nextInt(5) == 1) {
+      if (r.nextInt(3) == 1) {
+        s = makeDifficultRandomUnicodeString(r);
+      } else {
+        s = _TestUtil.randomUnicodeString(r);
+      }
     } else {
-      s = _TestUtil.randomUnicodeString(r);
+      s = _TestUtil.randomRealisticUnicodeString(r);
     }
     return s;
   }
@@ -272,7 +276,7 @@ public class TestSurrogates extends Luce
     RandomIndexWriter w = new RandomIndexWriter(r,
                                                 dir,
                                                 newIndexWriterConfig(r, TEST_VERSION_CURRENT,
-                                                                      new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec(null))));
+                                                                      new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
 
     final int numField = _TestUtil.nextInt(r, 2, 5);
 

Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java Sat Jul 24 13:40:22 2010
@@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.pr
 import org.apache.lucene.index.codecs.preflex.PreFlexFields;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.util.LuceneTestCaseJ4;
 
 /** Codec, only for testing, that can write and read the
  *  pre-flex index format.
@@ -33,20 +34,14 @@ import org.apache.lucene.index.codecs.Fi
  */
 public class PreFlexRWCodec extends PreFlexCodec {
 
-  private final String termSortOrder;
-
-  // termSortOrder should be null (dynamically deteremined
-  // by stack), "codepoint" or "utf16" 
-  public PreFlexRWCodec(String termSortOrder) {
+  public PreFlexRWCodec() {
     // NOTE: we impersonate the PreFlex codec so that it can
     // read the segments we write!
     super();
-    this.termSortOrder = termSortOrder;
   }
   
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    System.out.println("PFW");
     return new PreFlexFieldsWriter(state);
   }
 
@@ -56,23 +51,27 @@ public class PreFlexRWCodec extends PreF
     // Whenever IW opens readers, eg for merging, we have to
     // keep terms order in UTF16:
 
-    boolean unicodeSortOrder;
-    if (termSortOrder == null) {
-      unicodeSortOrder = true;
-
-      StackTraceElement[] trace = new Exception().getStackTrace();
-      for (int i = 0; i < trace.length; i++) {
-        //System.out.println(trace[i].getClassName());
-        if ("org.apache.lucene.index.IndexWriter".equals(trace[i].getClassName())) {
-          unicodeSortOrder = false;
-          break;
+    return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
+      @Override
+      protected boolean sortTermsByUnicode() {
+        // We carefully peek into stack track above us: if
+        // we are part of a "merge", we must sort by UTF16:
+        boolean unicodeSortOrder = true;
+
+        StackTraceElement[] trace = new Exception().getStackTrace();
+        for (int i = 0; i < trace.length; i++) {
+          //System.out.println(trace[i].getClassName());
+          if ("merge".equals(trace[i].getMethodName())) {
+            unicodeSortOrder = false;
+            if (LuceneTestCaseJ4.VERBOSE) {
+              System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
+            }
+            break;
+          }
         }
-      }
-      //System.out.println("PRW: " + unicodeSortOrder);
-    } else {
-      unicodeSortOrder = termSortOrder.equals("codepoint");
-    }
 
-    return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, unicodeSortOrder);
+        return unicodeSortOrder;
+      }
+    };
   }
 }

Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java Sat Jul 24 13:40:22 2010
@@ -128,7 +128,7 @@ public abstract class LuceneTestCase ext
     // test-only PreFlexRW codec (since core PreFlex can
     // only read segments):
     if (codec.equals("PreFlex")) {
-      CodecProvider.getDefault().register(new PreFlexRWCodec(null));
+      CodecProvider.getDefault().register(new PreFlexRWCodec());
     } 
     CodecProvider.setDefaultCodec(codec);
   }
@@ -158,7 +158,7 @@ public abstract class LuceneTestCase ext
     BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
     // Restore read-only PreFlex codec:
     if (codec.equals("PreFlex")) {
-      CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
+      CodecProvider.getDefault().unregister(new PreFlexRWCodec());
       CodecProvider.getDefault().register(new PreFlexCodec());
     } 
     CodecProvider.setDefaultCodec(savedDefaultCodec);

Modified: lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java?rev=978872&r1=978871&r2=978872&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/test/org/apache/lucene/util/LuceneTestCaseJ4.java Sat Jul 24 13:40:22 2010
@@ -152,7 +152,7 @@ public class LuceneTestCaseJ4 {
     // test-only PreFlexRW codec (since core PreFlex can
     // only read segments):
     if (codec.equals("PreFlex")) {
-      CodecProvider.getDefault().register(new PreFlexRWCodec(null));
+      CodecProvider.getDefault().register(new PreFlexRWCodec());
     } 
     CodecProvider.setDefaultCodec(codec);
   }
@@ -161,7 +161,7 @@ public class LuceneTestCaseJ4 {
   public static void afterClassLuceneTestCaseJ4() {
     // Restore read-only PreFlex codec:
     if (codec.equals("PreFlex")) {
-      CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
+      CodecProvider.getDefault().unregister(new PreFlexRWCodec());
       CodecProvider.getDefault().register(new PreFlexCodec());
     }
     CodecProvider.setDefaultCodec(savedDefaultCodec);