You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 17:24:23 UTC

svn commit: r1101062 [9/21] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/...

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java Mon May  9 15:24:04 2011
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis;
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
 
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.index.Payload;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
@@ -33,67 +33,50 @@ public final class MockAnalyzer extends 
   private final boolean lowerCase;
   private final CharacterRunAutomaton filter;
   private final boolean enablePositionIncrements;
-  private final boolean payload;
   private int positionIncrementGap;
-
-  /**
-   * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) 
-   * MockAnalyzer(runAutomaton, lowerCase, filter, enablePositionIncrements, true}).
-   */
-  public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
-    this(runAutomaton, lowerCase, filter, enablePositionIncrements, true);    
-  }
+  private final Random random;
+  private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
 
   /**
    * Creates a new MockAnalyzer.
    * 
+   * @param random Random for payloads behavior
    * @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
    * @param lowerCase true if the tokenizer should lowercase terms
    * @param filter DFA describing how terms should be filtered (set of stopwords, etc)
    * @param enablePositionIncrements true if position increments should reflect filtered terms.
-   * @param payload if payloads should be added containing the positions (for testing)
    */
-  public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements, boolean payload) {
+  public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
+    this.random = random;
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.filter = filter;
     this.enablePositionIncrements = enablePositionIncrements;
-    this.payload = payload;
   }
 
   /**
-   * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) 
-   * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true}).
+   * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) 
+   * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
    */
-  public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true);
+  public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+    this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false);
   }
 
-  /**
-   * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) 
-   * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload}).
-   */
-  public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, boolean payload) {
-    this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload);
-  }
-  
   /** 
    * Create a Whitespace-lowercasing analyzer with no stopwords removal.
    * <p>
-   * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) 
-   * MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false, true}).
+   * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) 
+   * MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}).
    */
-  public MockAnalyzer() {
-    this(MockTokenizer.WHITESPACE, true);
+  public MockAnalyzer(Random random) {
+    this(random, MockTokenizer.WHITESPACE, true);
   }
 
   @Override
   public TokenStream tokenStream(String fieldName, Reader reader) {
     MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
     TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
-    if (payload){
-      filt = new SimplePayloadFilter(filt, fieldName);
-    }
+    filt = maybePayload(filt, fieldName);
     return filt;
   }
 
@@ -105,15 +88,19 @@ public final class MockAnalyzer extends 
   @Override
   public TokenStream reusableTokenStream(String fieldName, Reader reader)
       throws IOException {
-    SavedStreams saved = (SavedStreams) getPreviousTokenStream();
+    @SuppressWarnings("unchecked") Map<String,SavedStreams> map = (Map) getPreviousTokenStream();
+    if (map == null) {
+      map = new HashMap<String,SavedStreams>();
+      setPreviousTokenStream(map);
+    }
+    
+    SavedStreams saved = map.get(fieldName);
     if (saved == null) {
       saved = new SavedStreams();
       saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
       saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
-      if (payload){
-        saved.filter = new SimplePayloadFilter(saved.filter, fieldName);
-      }
-      setPreviousTokenStream(saved);
+      saved.filter = maybePayload(saved.filter, fieldName);
+      map.put(fieldName, saved);
       return saved.filter;
     } else {
       saved.tokenizer.reset(reader);
@@ -122,6 +109,28 @@ public final class MockAnalyzer extends 
     }
   }
   
+  private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
+    Integer val = previousMappings.get(fieldName);
+    if (val == null) {
+      switch(random.nextInt(3)) {
+        case 0: val = -1; // no payloads
+                break;
+        case 1: val = Integer.MAX_VALUE; // variable length payload
+                break;
+        case 2: val = random.nextInt(12); // fixed length payload
+                break;
+      }
+      previousMappings.put(fieldName, val); // save it so we are consistent for this field
+    }
+    
+    if (val == -1)
+      return stream;
+    else if (val == Integer.MAX_VALUE)
+      return new MockVariableLengthPayloadFilter(random, stream);
+    else
+      return new MockFixedLengthPayloadFilter(random, stream, val);
+  }
+  
   public void setPositionIncrementGap(int positionIncrementGap){
     this.positionIncrementGap = positionIncrementGap;
   }
@@ -131,35 +140,3 @@ public final class MockAnalyzer extends 
     return positionIncrementGap;
   }
 }
-
-final class SimplePayloadFilter extends TokenFilter {
-  String fieldName;
-  int pos;
-  final PayloadAttribute payloadAttr;
-  final CharTermAttribute termAttr;
-
-  public SimplePayloadFilter(TokenStream input, String fieldName) {
-    super(input);
-    this.fieldName = fieldName;
-    pos = 0;
-    payloadAttr = input.addAttribute(PayloadAttribute.class);
-    termAttr = input.addAttribute(CharTermAttribute.class);
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
-      pos++;
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    pos = 0;
-  }
-}

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Mon May  9 15:24:04 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
  * Automaton-based tokenizer for testing. Optionally lowercases.
  */
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
   /** Acts Similar to WhitespaceTokenizer */
   public static final CharacterRunAutomaton WHITESPACE = 
     new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,67 @@ public class MockTokenizer extends CharT
   private final boolean lowerCase;
   private int state;
 
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  int off = 0;
+
   public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+    super(factory, input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
   }
 
   public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+    super(input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
   }
   
   @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    for (;;) {
+      int startOffset = off;
+      int cp = readCodePoint();
+      if (cp < 0) {
+        break;
+      } else if (isTokenChar(cp)) {
+        int endOffset;
+        do {
+          char chars[] = Character.toChars(normalize(cp));
+          for (int i = 0; i < chars.length; i++)
+            termAtt.append(chars[i]);
+          endOffset = off;
+          cp = readCodePoint();
+        } while (cp >= 0 && isTokenChar(cp));
+        offsetAtt.setOffset(startOffset, endOffset);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  protected int readCodePoint() throws IOException {
+    int ch = input.read();
+    if (ch < 0) {
+      return ch;
+    } else {
+      assert !Character.isLowSurrogate((char) ch);
+      off++;
+      if (Character.isHighSurrogate((char) ch)) {
+        int ch2 = input.read();
+        if (ch2 >= 0) {
+          off++;
+          assert Character.isLowSurrogate((char) ch2);
+          return Character.toCodePoint((char) ch, (char) ch2);
+        }
+      }
+      return ch;
+    }
+  }
+
   protected boolean isTokenChar(int c) {
     state = runAutomaton.step(state, c);
     if (state < 0) {
@@ -70,7 +117,6 @@ public class MockTokenizer extends CharT
     }
   }
   
-  @Override
   protected int normalize(int c) {
     return lowerCase ? Character.toLowerCase(c) : c;
   }
@@ -79,5 +125,12 @@ public class MockTokenizer extends CharT
   public void reset() throws IOException {
     super.reset();
     state = runAutomaton.getInitialState();
+    off = 0;
+  }
+
+  @Override
+  public void end() throws IOException {
+    int finalOffset = correctOffset(off);
+    offsetAtt.setOffset(finalOffset, finalOffset);
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java Mon May  9 15:24:04 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,6 +31,8 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
 import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
 
 class DocHelper {
@@ -218,9 +221,9 @@ class DocHelper {
    * @param doc
    * @throws IOException
    */ 
-  public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
+  public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
   {
-    return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc);
+    return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
   }
 
   /**
@@ -233,8 +236,8 @@ class DocHelper {
    * @param doc
    * @throws IOException
    */ 
-  public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
-    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
+  public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */ 
         TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
     //writer.setUseCompoundFile(false);
     writer.addDocument(doc);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Mon May  9 15:24:04 2011
@@ -42,6 +42,7 @@ public class RandomIndexWriter implement
   private final Random r;
   int docCount;
   int flushAt;
+  private double flushAtFactor = 1.0;
   private boolean getReaderCalled;
 
   // Randomly calls Thread.yield so we mixup thread scheduling
@@ -67,7 +68,7 @@ public class RandomIndexWriter implement
 
   /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
   public RandomIndexWriter(Random r, Directory dir) throws IOException {
-    this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer()));
+    this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r)));
   }
   
   /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
@@ -98,12 +99,20 @@ public class RandomIndexWriter implement
    */
   public void addDocument(Document doc) throws IOException {
     w.addDocument(doc);
+    maybeCommit();
+  }
+
+  private void maybeCommit() throws IOException {
     if (docCount++ == flushAt) {
       if (LuceneTestCase.VERBOSE) {
-        System.out.println("RIW.addDocument: now doing a commit");
+        System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
       }
       w.commit();
-      flushAt += _TestUtil.nextInt(r, 10, 1000);
+      flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
+      if (flushAtFactor < 2e6) {
+        // gradually but exponentially increase time b/w flushes
+        flushAtFactor *= 1.05;
+      }
     }
   }
   
@@ -113,13 +122,7 @@ public class RandomIndexWriter implement
    */
   public void updateDocument(Term t, Document doc) throws IOException {
     w.updateDocument(t, doc);
-    if (docCount++ == flushAt) {
-      if (LuceneTestCase.VERBOSE) {
-        System.out.println("RIW.updateDocument: now doing a commit");
-      }
-      w.commit();
-      flushAt += _TestUtil.nextInt(r, 10, 1000);
-    }
+    maybeCommit();
   }
   
   public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
@@ -181,7 +184,7 @@ public class RandomIndexWriter implement
         System.out.println("RIW.getReader: open new reader");
       }
       w.commit();
-      return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10));
+      return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
     }
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Mon May  9 15:24:04 2011
@@ -120,7 +120,14 @@ public class MockRandomCodec extends Cod
 
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-
+    // we pull this before the seed intentionally: because its not consumed at runtime
+    // (the skipInterval is written into postings header)
+    int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);
+    
+    if (LuceneTestCase.VERBOSE) {
+      System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
+    }
+    
     final long seed = seedRandom.nextLong();
 
     if (LuceneTestCase.VERBOSE) {
@@ -136,12 +143,12 @@ public class MockRandomCodec extends Cod
     PostingsWriterBase postingsWriter;
 
     if (random.nextBoolean()) {
-      postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random));
+      postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random), skipInterval);
     } else {
       if (LuceneTestCase.VERBOSE) {
         System.out.println("MockRandomCodec: writing Standard postings");
       }
-      postingsWriter = new StandardPostingsWriter(state);
+      postingsWriter = new StandardPostingsWriter(state, skipInterval);
     }
 
     if (random.nextBoolean()) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java Mon May  9 15:24:04 2011
@@ -2,13 +2,14 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Random;
+import java.lang.reflect.Method;
 
 import junit.framework.Assert;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiReader;
@@ -166,7 +167,7 @@ public class QueryUtils {
     throws IOException {
     Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
       IndexWriter w = new IndexWriter(d, new IndexWriterConfig(
-        TEST_VERSION_CURRENT, new MockAnalyzer()));
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)));
       for (int i = 0; i < numDeletedDocs; i++) {
         w.addDocument(new Document());
       }
@@ -309,7 +310,7 @@ public class QueryUtils {
           // confirm that skipping beyond the last doc, on the
           // previous reader, hits NO_MORE_DOCS
           final IndexReader previousReader = lastReader[0];
-          IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
+          IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false);
           Weight w = q.weight(indexSearcher);
           Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def());
           if (scorer != null) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Mon May  9 15:24:04 2011
@@ -32,7 +32,9 @@ import java.util.Random;
 import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ThrottledIndexOutput;
 import org.apache.lucene.util._TestUtil;
 
 /**
@@ -68,6 +70,7 @@ public class MockDirectoryWrapper extend
   private Set<String> createdFiles;
   Set<String> openFilesForWrite = new HashSet<String>();
   volatile boolean crashed;
+  private ThrottledIndexOutput throttledOutput;
 
   // use this for tracking files for crash.
   // additionally: provides debugging information in case you leave one open
@@ -113,6 +116,10 @@ public class MockDirectoryWrapper extend
   public void setPreventDoubleWrite(boolean value) {
     preventDoubleWrite = value;
   }
+  
+  public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
+    this.throttledOutput = throttledOutput;
+  }
 
   @Override
   public synchronized void sync(Collection<String> names) throws IOException {
@@ -347,7 +354,7 @@ public class MockDirectoryWrapper extend
     IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
     openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
     openFilesForWrite.add(name);
-    return io;
+    return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
   }
 
   @Override
@@ -419,12 +426,30 @@ public class MockDirectoryWrapper extend
       throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause);
     }
     open = false;
-    if (checkIndexOnClose && IndexReader.indexExists(this)) {
-      _TestUtil.checkIndex(this);
+    if (checkIndexOnClose) {
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
+      } 
+      if (codecProvider != null) {
+        if (IndexReader.indexExists(this, codecProvider)) {
+          _TestUtil.checkIndex(this, codecProvider);
+        }
+      } else {
+        if (IndexReader.indexExists(this)) {
+          _TestUtil.checkIndex(this);
+        }
+      }
     }
     delegate.close();
   }
 
+  private CodecProvider codecProvider;
+
+  // We pass this CodecProvider to checkIndex when dir is closed...
+  public void setCodecProvider(CodecProvider cp) {
+    codecProvider = cp;
+  }
+
   boolean open = true;
   
   public synchronized boolean isOpen() {
@@ -559,4 +584,5 @@ public class MockDirectoryWrapper extend
     maybeYield();
     delegate.copy(to, src, dest);
   }
+  
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Mon May  9 15:24:04 2011
@@ -24,7 +24,6 @@ import java.io.IOException;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.InputStream;
-import java.io.BufferedInputStream;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.GZIPInputStream;
 import java.util.Random;
@@ -79,8 +78,7 @@ public class LineFileDocs implements Clo
       size *= 2.8;
     }
 
-    final InputStream in = new BufferedInputStream(is, BUFFER_SIZE);
-    reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE);
+    reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
 
     // Override sizes for currently "known" line files:
     if (path.equals("europarl.lines.txt.gz")) {
@@ -128,7 +126,7 @@ public class LineFileDocs implements Clo
       body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
       doc.add(body);
 
-      id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+      id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
       doc.add(id);
 
       date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Mon May  9 15:24:04 2011
@@ -116,7 +116,7 @@ public abstract class LuceneTestCase ext
    * If this is set, it is the only method that should run.
    */
   static final String TEST_METHOD;
-  
+
   /** Create indexes in this directory, optimally use a subdir, named after the test */
   public static final File TEMP_DIR;
   static {
@@ -128,12 +128,17 @@ public abstract class LuceneTestCase ext
     TEMP_DIR = new File(s);
     TEMP_DIR.mkdirs();
   }
+  
+  /** set of directories we created, in afterclass we try to clean these up */
+  static final Set<String> tempDirs = Collections.synchronizedSet(new HashSet<String>());
 
   // by default we randomly pick a different codec for
   // each test case (non-J4 tests) and each test class (J4
   // tests)
   /** Gets the codec to run tests with. */
   public static final String TEST_CODEC = System.getProperty("tests.codec", "randomPerField");
+  /** Gets the codecprovider to run tests with */
+  public static final String TEST_CODECPROVIDER = System.getProperty("tests.codecprovider", "random");
   /** Gets the locale to run tests with */
   public static final String TEST_LOCALE = System.getProperty("tests.locale", "random");
   /** Gets the timezone to run tests with */
@@ -160,11 +165,11 @@ public abstract class LuceneTestCase ext
    * multiply it by the number of iterations
    */
   public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1"));
-  
+
   private int savedBoolMaxClauseCount;
 
   private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
-  
+
   /** Used to track if setUp and tearDown are called correctly from subclasses */
   private boolean setup;
 
@@ -186,28 +191,28 @@ public abstract class LuceneTestCase ext
   private static class UncaughtExceptionEntry {
     public final Thread thread;
     public final Throwable exception;
-    
+
     public UncaughtExceptionEntry(Thread thread, Throwable exception) {
       this.thread = thread;
       this.exception = exception;
     }
   }
   private List<UncaughtExceptionEntry> uncaughtExceptions = Collections.synchronizedList(new ArrayList<UncaughtExceptionEntry>());
-  
+
   // saves default codec: we do this statically as many build indexes in @beforeClass
   private static String savedDefaultCodec;
   // default codec: not set when we use a per-field provider.
   private static Codec codec;
   // default codec provider
   private static CodecProvider savedCodecProvider;
-  
+
   private static Locale locale;
   private static Locale savedLocale;
   private static TimeZone timeZone;
   private static TimeZone savedTimeZone;
-  
+
   private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
-  
+
   private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"};
 
   private static void swapCodec(Codec c, CodecProvider cp) {
@@ -285,7 +290,7 @@ public abstract class LuceneTestCase ext
 
   // randomly picks from core and test codecs
   static String pickRandomCodec(Random rnd) {
-    int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + 
+    int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length +
                           TEST_CODECS.length);
     if (idx < CodecProvider.CORE_CODECS.length) {
       return CodecProvider.CORE_CODECS[idx];
@@ -318,22 +323,46 @@ public abstract class LuceneTestCase ext
   /** @deprecated (4.0) until we fix no-fork problems in solr tests */
   @Deprecated
   private static List<String> testClassesRun = new ArrayList<String>();
-  
+
   @BeforeClass
   public static void beforeClassLuceneTestCaseJ4() {
     staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
     random.setSeed(staticSeed);
+    tempDirs.clear();
     stores = Collections.synchronizedMap(new IdentityHashMap<MockDirectoryWrapper,StackTraceElement[]>());
     savedCodecProvider = CodecProvider.getDefault();
-    if ("randomPerField".equals(TEST_CODEC)) {
-      if (random.nextInt(4) == 0) { // preflex-only setup
-        codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
-      } else { // per-field setup
-        CodecProvider.setDefault(new RandomCodecProvider(random));
+    if ("random".equals(TEST_CODECPROVIDER)) {
+      if ("randomPerField".equals(TEST_CODEC)) {
+        if (random.nextInt(4) == 0) { // preflex-only setup
+          codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
+        } else { // per-field setup
+          CodecProvider.setDefault(new RandomCodecProvider(random));
+          codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+        }
+      } else { // ordinary setup
         codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
       }
-    } else { // ordinary setup
-      codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+    } else {
+      // someone specified their own codecprovider by class
+      try {
+        Class<? extends CodecProvider> cpClazz = Class.forName(TEST_CODECPROVIDER).asSubclass(CodecProvider.class);
+        CodecProvider cp = cpClazz.newInstance();
+        String codecName;
+        if (TEST_CODEC.startsWith("random")) { // TODO: somehow do random per-field?!
+          Set<String> codecSet = cp.listAll();
+          String availableCodecs[] = codecSet.toArray(new String[codecSet.size()]);
+          codecName = availableCodecs[random.nextInt(availableCodecs.length)];
+        } else {
+          codecName = TEST_CODEC;
+        }
+        
+        codec = cp.lookup(codecName);
+        cp.setDefaultFieldCodec(codecName);
+        CodecProvider.setDefault(cp);
+      } catch (Exception e) {
+        System.err.println("Could not instantiate CodecProvider: " + TEST_CODECPROVIDER);
+        throw new RuntimeException(e);
+      }
     }
     savedLocale = Locale.getDefault();
     locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
@@ -343,7 +372,7 @@ public abstract class LuceneTestCase ext
     TimeZone.setDefault(timeZone);
     testsFailed = false;
   }
-  
+
   @AfterClass
   public static void afterClassLuceneTestCaseJ4() {
     if (! "false".equals(TEST_CLEAN_THREADS)) {
@@ -356,16 +385,13 @@ public abstract class LuceneTestCase ext
     String codecDescription;
     CodecProvider cp = CodecProvider.getDefault();
 
-    if ("randomPerField".equals(TEST_CODEC)) {
-      if (cp instanceof RandomCodecProvider)
-        codecDescription = cp.toString();
-      else 
-        codecDescription = "PreFlex";
+    if ("randomPerField".equals(TEST_CODEC) && cp instanceof RandomCodecProvider) {
+      codecDescription = cp.toString();
     } else {
       codecDescription = codec.toString();
     }
-    
-    if (CodecProvider.getDefault() == savedCodecProvider)
+
+    if ("random".equals(TEST_CODECPROVIDER) && CodecProvider.getDefault() == savedCodecProvider)
       removeTestCodecs(codec, CodecProvider.getDefault());
     CodecProvider.setDefault(savedCodecProvider);
     Locale.setDefault(savedLocale);
@@ -394,14 +420,14 @@ public abstract class LuceneTestCase ext
     stores = null;
     // if verbose or tests failed, report some information back
     if (VERBOSE || testsFailed)
-      System.err.println("NOTE: test params are: codec=" + codecDescription + 
-        ", locale=" + locale + 
+      System.err.println("NOTE: test params are: codec=" + codecDescription +
+        ", locale=" + locale +
         ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID()));
     if (testsFailed) {
       System.err.println("NOTE: all tests run in this JVM:");
       System.err.println(Arrays.toString(testClassesRun.toArray()));
-      System.err.println("NOTE: " + System.getProperty("os.name") + " " 
-          + System.getProperty("os.version") + " " 
+      System.err.println("NOTE: " + System.getProperty("os.name") + " "
+          + System.getProperty("os.version") + " "
           + System.getProperty("os.arch") + "/"
           + System.getProperty("java.vendor") + " "
           + System.getProperty("java.version") + " "
@@ -411,10 +437,20 @@ public abstract class LuceneTestCase ext
           + "free=" + Runtime.getRuntime().freeMemory() + ","
           + "total=" + Runtime.getRuntime().totalMemory());
     }
+    // clear out any temp directories if we can
+    if (!testsFailed) {
+      for (String path : tempDirs) {
+        try {
+          _TestUtil.rmDir(new File(path));
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+    }
   }
 
   private static boolean testsFailed; /* true if any tests failed */
-  
+
   // This is how we get control when errors occur.
   // Think of this as start/end/success/failed
   // events.
@@ -449,7 +485,7 @@ public abstract class LuceneTestCase ext
       LuceneTestCase.this.name = method.getName();
       super.starting(method);
     }
-    
+
   };
 
   @Before
@@ -467,7 +503,7 @@ public abstract class LuceneTestCase ext
           savedUncaughtExceptionHandler.uncaughtException(t, e);
       }
     });
-    
+
     savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
   }
 
@@ -499,7 +535,7 @@ public abstract class LuceneTestCase ext
     if ("perMethod".equals(TEST_CLEAN_THREADS)) {
       int rogueThreads = threadCleanup("test method: '" + getName() + "'");
       if (rogueThreads > 0) {
-        System.err.println("RESOURCE LEAK: test method: '" + getName() 
+        System.err.println("RESOURCE LEAK: test method: '" + getName()
             + "' left " + rogueThreads + " thread(s) running");
         // TODO: fail, but print seed for now.
         if (!testsFailed && uncaughtExceptions.isEmpty()) {
@@ -521,18 +557,18 @@ public abstract class LuceneTestCase ext
         fail("Some threads threw uncaught exceptions!");
       }
 
-      // calling assertSaneFieldCaches here isn't as useful as having test 
-      // classes call it directly from the scope where the index readers 
-      // are used, because they could be gc'ed just before this tearDown 
+      // calling assertSaneFieldCaches here isn't as useful as having test
+      // classes call it directly from the scope where the index readers
+      // are used, because they could be gc'ed just before this tearDown
       // method is called.
       //
       // But it's better then nothing.
       //
-      // If you are testing functionality that you know for a fact 
-      // "violates" FieldCache sanity, then you should either explicitly 
+      // If you are testing functionality that you know for a fact
+      // "violates" FieldCache sanity, then you should either explicitly
       // call purgeFieldCache at the end of your test method, or refactor
-      // your Test class so that the inconsistant FieldCache usages are 
-      // isolated in distinct test methods  
+      // your Test class so that the inconsistant FieldCache usages are
+      // isolated in distinct test methods
       assertSaneFieldCaches(getTestLabel());
 
     } finally {
@@ -543,14 +579,14 @@ public abstract class LuceneTestCase ext
   private final static int THREAD_STOP_GRACE_MSEC = 50;
   // jvm-wide list of 'rogue threads' we found, so they only get reported once.
   private final static IdentityHashMap<Thread,Boolean> rogueThreads = new IdentityHashMap<Thread,Boolean>();
-  
+
   static {
     // just a hack for things like eclipse test-runner threads
     for (Thread t : Thread.getAllStackTraces().keySet()) {
       rogueThreads.put(t, true);
     }
   }
-  
+
   /**
    * Looks for leftover running threads, trying to kill them off,
    * so they don't fail future tests.
@@ -561,20 +597,20 @@ public abstract class LuceneTestCase ext
     Thread[] stillRunning = new Thread[Thread.activeCount()+1];
     int threadCount = 0;
     int rogueCount = 0;
-    
+
     if ((threadCount = Thread.enumerate(stillRunning)) > 1) {
       while (threadCount == stillRunning.length) {
         // truncated response
         stillRunning = new Thread[stillRunning.length*2];
         threadCount = Thread.enumerate(stillRunning);
       }
-      
+
       for (int i = 0; i < threadCount; i++) {
         Thread t = stillRunning[i];
-          
-        if (t.isAlive() && 
-            !rogueThreads.containsKey(t) && 
-            t != Thread.currentThread() && 
+
+        if (t.isAlive() &&
+            !rogueThreads.containsKey(t) &&
+            t != Thread.currentThread() &&
             /* its ok to keep your searcher across test cases */
             (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) {
           System.err.println("WARNING: " + context  + " left thread running: " + t);
@@ -599,7 +635,7 @@ public abstract class LuceneTestCase ext
     }
     return rogueCount;
   }
-  
+
   /**
    * Asserts that FieldCacheSanityChecker does not detect any
    * problems with FieldCache.DEFAULT.
@@ -642,13 +678,13 @@ public abstract class LuceneTestCase ext
 
     }
   }
-  
+
   // @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed:
   @Deprecated
   static public void assertEquals(double expected, double actual) {
     assertEquals(null, expected, actual);
   }
-   
+
   @Deprecated
   static public void assertEquals(String message, double expected, double actual) {
     assertEquals(message, Double.valueOf(expected), Double.valueOf(actual));
@@ -663,18 +699,18 @@ public abstract class LuceneTestCase ext
   static public void assertEquals(String message, float expected, float actual) {
     assertEquals(message, Float.valueOf(expected), Float.valueOf(actual));
   }
-  
+
   // Replacement for Assume jUnit class, so we can add a message with explanation:
-  
+
   private static final class TestIgnoredException extends RuntimeException {
     TestIgnoredException(String msg) {
       super(msg);
     }
-    
+
     TestIgnoredException(String msg, Throwable t) {
       super(msg, t);
     }
-    
+
     @Override
     public String getMessage() {
       StringBuilder sb = new StringBuilder(super.getMessage());
@@ -682,7 +718,7 @@ public abstract class LuceneTestCase ext
         sb.append(" - ").append(getCause());
       return sb.toString();
     }
-    
+
     // only this one is called by our code, exception is not used outside this class:
     @Override
     public void printStackTrace(PrintStream s) {
@@ -694,19 +730,19 @@ public abstract class LuceneTestCase ext
       }
     }
   }
-  
+
   public static void assumeTrue(String msg, boolean b) {
     Assume.assumeNoException(b ? null : new TestIgnoredException(msg));
   }
- 
+
   public static void assumeFalse(String msg, boolean b) {
     assumeTrue(msg, !b);
   }
-  
+
   public static void assumeNoException(String msg, Exception e) {
     Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e));
   }
- 
+
   public static <T> Set<T> asSet(T... args) {
     return new HashSet<T>(Arrays.asList(args));
   }
@@ -764,13 +800,15 @@ public abstract class LuceneTestCase ext
       c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
     }
     if (r.nextBoolean()) {
-      c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20));
+      c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20)));
     }
 
     if (r.nextBoolean()) {
-      c.setMergePolicy(new MockRandomMergePolicy(r));
-    } else {
+      c.setMergePolicy(newTieredMergePolicy());
+    } else if (r.nextBoolean()) {
       c.setMergePolicy(newLogMergePolicy());
+    } else {
+      c.setMergePolicy(new MockRandomMergePolicy(r));
     }
 
     c.setReaderPooling(r.nextBoolean());
@@ -782,6 +820,10 @@ public abstract class LuceneTestCase ext
     return newLogMergePolicy(random);
   }
 
+  public static TieredMergePolicy newTieredMergePolicy() {
+    return newTieredMergePolicy(random);
+  }
+
   public static LogMergePolicy newLogMergePolicy(Random r) {
     LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy();
     logmp.setUseCompoundFile(r.nextBoolean());
@@ -794,17 +836,22 @@ public abstract class LuceneTestCase ext
     return logmp;
   }
 
-  public static LogMergePolicy newInOrderLogMergePolicy() {
-    LogMergePolicy logmp = newLogMergePolicy();
-    logmp.setRequireContiguousMerge(true);
-    return logmp;
-  }
-
-  public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) {
-    LogMergePolicy logmp = newLogMergePolicy();
-    logmp.setMergeFactor(mergeFactor);
-    logmp.setRequireContiguousMerge(true);
-    return logmp;
+  public static TieredMergePolicy newTieredMergePolicy(Random r) {
+    TieredMergePolicy tmp = new TieredMergePolicy();
+    if (r.nextInt(3) == 2) {
+      tmp.setMaxMergeAtOnce(2);
+      tmp.setMaxMergeAtOnceExplicit(2);
+    } else {
+      tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20));
+      tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30));
+    }
+    tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
+    tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0);
+    tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0);
+    tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20));
+    tmp.setUseCompoundFile(r.nextBoolean());
+    tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+    return tmp;
   }
 
   public static LogMergePolicy newLogMergePolicy(boolean useCFS) {
@@ -839,7 +886,7 @@ public abstract class LuceneTestCase ext
   public static MockDirectoryWrapper newDirectory() throws IOException {
     return newDirectory(random);
   }
-  
+
   /**
    * Returns a new Directory instance, using the specified random.
    * See {@link #newDirectory()} for more information.
@@ -850,7 +897,7 @@ public abstract class LuceneTestCase ext
     stores.put(dir, Thread.currentThread().getStackTrace());
     return dir;
   }
-  
+
   /**
    * Returns a new Directory instance, with contents copied from the
    * provided directory. See {@link #newDirectory()} for more
@@ -859,23 +906,23 @@ public abstract class LuceneTestCase ext
   public static MockDirectoryWrapper newDirectory(Directory d) throws IOException {
     return newDirectory(random, d);
   }
-  
+
   /** Returns a new FSDirectory instance over the given file, which must be a folder. */
   public static MockDirectoryWrapper newFSDirectory(File f) throws IOException {
     return newFSDirectory(f, null);
   }
-  
+
   /** Returns a new FSDirectory instance over the given file, which must be a folder. */
   public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException {
     String fsdirClass = TEST_DIRECTORY;
     if (fsdirClass.equals("random")) {
       fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
     }
-    
+
     if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
       fsdirClass = "org.apache.lucene.store." + fsdirClass;
     }
-    
+
     Class<? extends FSDirectory> clazz;
     try {
       try {
@@ -883,11 +930,11 @@ public abstract class LuceneTestCase ext
       } catch (ClassCastException e) {
         // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random
         fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
-        
+
         if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
           fsdirClass = "org.apache.lucene.store." + fsdirClass;
         }
-        
+
         clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class);
       }
       MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf));
@@ -897,7 +944,7 @@ public abstract class LuceneTestCase ext
       throw new RuntimeException(e);
     }
   }
-  
+
   /**
    * Returns a new Directory instance, using the specified random
    * with contents copied from the provided directory. See 
@@ -955,44 +1002,44 @@ public abstract class LuceneTestCase ext
   public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) {
     if (!index.isIndexed())
       return new Field(name, value, store, index);
-    
+
     if (!store.isStored() && random.nextBoolean())
       store = Store.YES; // randomly store it
-    
+
     tv = randomTVSetting(random, tv);
-    
+
     return new Field(name, value, store, index, tv);
   }
-  
-  static final TermVector tvSettings[] = { 
-    TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, 
-    TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS 
+
+  static final TermVector tvSettings[] = {
+    TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS,
+    TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS
   };
-  
+
   private static TermVector randomTVSetting(Random random, TermVector minimum) {
     switch(minimum) {
       case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)];
       case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)];
-      case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS 
+      case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS
           : TermVector.WITH_POSITIONS_OFFSETS;
-      case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS 
+      case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS
           : TermVector.WITH_POSITIONS_OFFSETS;
       default: return TermVector.WITH_POSITIONS_OFFSETS;
     }
   }
-  
+
   /** return a random Locale from the available locales on the system */
   public static Locale randomLocale(Random random) {
     Locale locales[] = Locale.getAvailableLocales();
     return locales[random.nextInt(locales.length)];
   }
-  
+
   /** return a random TimeZone from the available timezones on the system */
   public static TimeZone randomTimeZone(Random random) {
     String tzIds[] = TimeZone.getAvailableIDs();
     return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]);
   }
-  
+
   /** return a Locale object equivalent to its programmatic name */
   public static Locale localeForName(String localeName) {
     String elements[] = localeName.split("\\_");
@@ -1014,7 +1061,7 @@ public abstract class LuceneTestCase ext
     "RAMDirectory",
     FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2]
   };
-  
+
   public static String randomDirectory(Random random) {
     if (random.nextInt(10) == 0) {
       return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)];
@@ -1026,20 +1073,21 @@ public abstract class LuceneTestCase ext
   private static Directory newFSDirectoryImpl(
       Class<? extends FSDirectory> clazz, File file, LockFactory lockFactory)
       throws IOException {
+    FSDirectory d = null;
     try {
       // Assuming every FSDirectory has a ctor(File), but not all may take a
       // LockFactory too, so setting it afterwards.
       Constructor<? extends FSDirectory> ctor = clazz.getConstructor(File.class);
-      FSDirectory d = ctor.newInstance(file);
-      if (lockFactory != null) {
-        d.setLockFactory(lockFactory);
-      }
-      return d;
+      d = ctor.newInstance(file);
     } catch (Exception e) {
-      return FSDirectory.open(file);
+      d = FSDirectory.open(file);
+    }
+    if (lockFactory != null) {
+      d.setLockFactory(lockFactory);
     }
+    return d;
   }
-  
+
   static Directory newDirectoryImpl(Random random, String clazzName) {
     if (clazzName.equals("random"))
       clazzName = randomDirectory(random);
@@ -1052,6 +1100,7 @@ public abstract class LuceneTestCase ext
         final File tmpFile = File.createTempFile("test", "tmp", TEMP_DIR);
         tmpFile.delete();
         tmpFile.mkdir();
+        tempDirs.add(tmpFile.getAbsolutePath());
         return newFSDirectoryImpl(clazz.asSubclass(FSDirectory.class), tmpFile, null);
       }
 
@@ -1059,18 +1108,31 @@ public abstract class LuceneTestCase ext
       return clazz.newInstance();
     } catch (Exception e) {
       throw new RuntimeException(e);
-    } 
+    }
   }
-  
+
   /** create a new searcher over the reader.
    * This searcher might randomly use threads. */
   public static IndexSearcher newSearcher(IndexReader r) throws IOException {
+    return newSearcher(r, true);
+  }
+  
+  /** create a new searcher over the reader.
+   * This searcher might randomly use threads.
+   * if <code>maybeWrap</code> is true, this searcher might wrap the reader
+   * with one that returns null for getSequentialSubReaders.
+   */
+  public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException {
     if (random.nextBoolean()) {
-      return new IndexSearcher(r);
+      if (maybeWrap && random.nextBoolean()) {
+        return new IndexSearcher(new SlowMultiReaderWrapper(r));
+      } else {
+        return new IndexSearcher(r);
+      }
     } else {
       int threads = 0;
-      final ExecutorService ex = (random.nextBoolean()) ? null 
-          : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), 
+      final ExecutorService ex = (random.nextBoolean()) ? null
+          : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
                       new NamedThreadFactory("LuceneTestCase"));
       if (ex != null && VERBOSE) {
         System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
@@ -1095,12 +1157,12 @@ public abstract class LuceneTestCase ext
   public String getName() {
     return this.name;
   }
-  
+
   /** Gets a resource from the classpath as {@link File}. This method should only be used,
    * if a real file is needed. To get a stream, code should prefer
    * {@link Class#getResourceAsStream} using {@code this.getClass()}.
    */
-  
+
   protected File getDataFile(String name) throws IOException {
     try {
       return new File(this.getClass().getResource(name).toURI());
@@ -1111,11 +1173,11 @@ public abstract class LuceneTestCase ext
 
   // We get here from InterceptTestCaseEvents on the 'failed' event....
   public void reportAdditionalFailureInfo() {
-    System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() 
+    System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName()
         + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed)
         + reproduceWithExtraParams());
   }
-  
+
   // extra params that were overridden needed to reproduce the command
   private String reproduceWithExtraParams() {
     StringBuilder sb = new StringBuilder();
@@ -1131,12 +1193,12 @@ public abstract class LuceneTestCase ext
   private static long staticSeed;
   // seed for individual test methods, changed in @before
   private long seed;
-  
+
   private static final Random seedRand = new Random();
   protected static final Random random = new Random(0);
 
   private String name = "<unknown>";
-  
+
   /**
    * Annotation for tests that should only be run during nightly builds.
    */
@@ -1144,7 +1206,7 @@ public abstract class LuceneTestCase ext
   @Inherited
   @Retention(RetentionPolicy.RUNTIME)
   public @interface Nightly {}
-  
+
   /** optionally filters the tests to be run by TEST_METHOD */
   public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner {
     private List<FrameworkMethod> testMethods;
@@ -1174,11 +1236,11 @@ public abstract class LuceneTestCase ext
           testMethods.add(new FrameworkMethod(m));
         }
       }
-      
+
       if (testMethods.isEmpty()) {
         throw new RuntimeException("No runnable methods!");
       }
-      
+
       if (TEST_NIGHTLY == false) {
         if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) {
           /* the test class is annotated with nightly, remove all methods */
@@ -1213,17 +1275,15 @@ public abstract class LuceneTestCase ext
       
       // only print iteration info if the user requested more than one iterations
       boolean verbose = VERBOSE && TEST_ITER > 1;
-      int lastIterFailed = -1;
       for (int i = 0; i < TEST_ITER; i++) {
         if (verbose) {
           System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER);
         }
         super.runChild(arg0, arg1);
         if (testsFailed) {
-          lastIterFailed = i;
-          if (i == TEST_ITER_MIN - 1) {
+          if (i >= TEST_ITER_MIN - 1) {
             if (verbose) {
-              System.out.println("\nNOTE: iteration " + lastIterFailed + " failed !");
+              System.out.println("\nNOTE: iteration " + i + " failed !");
             }
             break;
           }
@@ -1241,9 +1301,9 @@ public abstract class LuceneTestCase ext
         @Override
         public boolean shouldRun(Description d) {
           return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD);
-        }     
+        }
       };
-      
+
       try {
         f.apply(this);
       } catch (NoTestsRemainException e) {
@@ -1251,12 +1311,12 @@ public abstract class LuceneTestCase ext
       }
     }
   }
-  
+
   private static class RandomCodecProvider extends CodecProvider {
     private List<Codec> knownCodecs = new ArrayList<Codec>();
     private Map<String,Codec> previousMappings = new HashMap<String,Codec>();
     private final int perFieldSeed;
-    
+
     RandomCodecProvider(Random random) {
       this.perFieldSeed = random.nextInt();
       register(new StandardCodec());
@@ -1288,13 +1348,13 @@ public abstract class LuceneTestCase ext
       }
       return codec.name;
     }
-    
+
     @Override
     public synchronized String toString() {
       return "RandomCodecProvider: " + previousMappings.toString();
     }
   }
-  
+
   @Ignore("just a hack")
   public final void alwaysIgnoredTestMethod() {}
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Mon May  9 15:24:04 2011
@@ -34,8 +34,6 @@ import java.util.HashMap;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
-import org.junit.Assert;
-
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.CheckIndex;
@@ -43,17 +41,22 @@ import org.apache.lucene.index.Concurren
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
+import org.junit.Assert;
 
 public class _TestUtil {
 
   /** Returns temp dir, containing String arg in its name;
    *  does not create the directory. */
   public static File getTempDir(String desc) {
-    return new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong());
+    File f = new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong());
+    LuceneTestCase.tempDirs.add(f.getAbsolutePath());
+    return f;
   }
 
   /**
@@ -88,6 +91,7 @@ public class _TestUtil {
     rmDir(destDir);
     
     destDir.mkdir();
+    LuceneTestCase.tempDirs.add(destDir.getAbsolutePath());
     
     while (entries.hasMoreElements()) {
       ZipEntry entry = entries.nextElement();
@@ -157,6 +161,19 @@ public class _TestUtil {
     return start + r.nextInt(end-start+1);
   }
 
+  public static String randomSimpleString(Random r) {
+    final int end = r.nextInt(10);
+    if (end == 0) {
+      // allow 0 length
+      return "";
+    }
+    final char[] buffer = new char[end];
+    for (int i = 0; i < end; i++) {
+      buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
+    }
+    return new String(buffer, 0, end);
+  }
+
   /** Returns random string, including full unicode range. */
   public static String randomUnicodeString(Random r) {
     return randomUnicodeString(r, 20);
@@ -172,22 +189,35 @@ public class _TestUtil {
       return "";
     }
     final char[] buffer = new char[end];
-    for (int i = 0; i < end; i++) {
-      int t = r.nextInt(5);
+    randomFixedLengthUnicodeString(r, buffer, 0, buffer.length);
+    return new String(buffer, 0, end);
+  }
 
-      if (0 == t && i < end - 1) {
+  /**
+   * Fills provided char[] with valid random unicode code
+   * unit sequence.
+   */
+  public static void randomFixedLengthUnicodeString(Random random, char[] chars, int offset, int length) {
+    int i = offset;
+    final int end = offset + length;
+    while(i < end) {
+      final int t = random.nextInt(5);
+      if (0 == t && i < length - 1) {
         // Make a surrogate pair
         // High surrogate
-        buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
+        chars[i++] = (char) nextInt(random, 0xd800, 0xdbff);
         // Low surrogate
-        buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
+        chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff);
+      } else if (t <= 1) {
+        chars[i++] = (char) random.nextInt(0x80);
+      } else if (2 == t) {
+        chars[i++] = (char) nextInt(random, 0x80, 0x800);
+      } else if (3 == t) {
+        chars[i++] = (char) nextInt(random, 0x800, 0xd7ff);
+      } else if (4 == t) {
+        chars[i++] = (char) nextInt(random, 0xe000, 0xffff);
       }
-      else if (t <= 1) buffer[i] = (char) r.nextInt(0x80);
-      else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800);
-      else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
-      else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
     }
-    return new String(buffer, 0, end);
   }
 
   private static final int[] blockStarts = {
@@ -294,9 +324,14 @@ public class _TestUtil {
    * count lowish */
   public static void reduceOpenFiles(IndexWriter w) {
     // keep number of open files lowish
-    LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
-    lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
-
+    MergePolicy mp = w.getConfig().getMergePolicy();
+    if (mp instanceof LogMergePolicy) {
+      LogMergePolicy lmp = (LogMergePolicy) mp;
+      lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
+    } else if (mp instanceof TieredMergePolicy) {
+      TieredMergePolicy tmp = (TieredMergePolicy) mp;
+      tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
+    }
     MergeScheduler ms = w.getConfig().getMergeScheduler();
     if (ms instanceof ConcurrentMergeScheduler) {
       ((ConcurrentMergeScheduler) ms).setMaxThreadCount(2);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html Mon May  9 15:24:04 2011
@@ -1,28 +1,28 @@
-<html>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<head>
-   <title>Apache Lucene Test Framework API</title>
-</head>
-<body>
-<p>
-  The Lucene Test Framework is used by Lucene as the basis for its tests.  
-  The framework can also be used for testing third-party code that uses
-  the Lucene API. 
-</p>
-</body>
-</html>
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<head>
+   <title>Apache Lucene Test Framework API</title>
+</head>
+<body>
+<p>
+  The Lucene Test Framework is used by Lucene as the basis for its tests.  
+  The framework can also be used for testing third-party code that uses
+  the Lucene API. 
+</p>
+</body>
+</html>

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java Mon May  9 15:24:04 2011
@@ -43,13 +43,13 @@ import org.apache.lucene.util.LuceneTest
 public class TestDemo extends LuceneTestCase {
 
   public void testDemo() throws IOException, ParseException {
-    Analyzer analyzer = new MockAnalyzer();
+    Analyzer analyzer = new MockAnalyzer(random);
 
     // Store the index in memory:
     Directory directory = newDirectory();
     // To store an index on disk, use this instead:
     //Directory directory = FSDirectory.open("/tmp/testindex");
-    RandomIndexWriter iwriter = new RandomIndexWriter(random, directory);
+    RandomIndexWriter iwriter = new RandomIndexWriter(random, directory, analyzer);
     iwriter.w.setInfoStream(VERBOSE ? System.out : null);
     Document doc = new Document();
     String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Mon May  9 15:24:04 2011
@@ -587,7 +587,9 @@ public class TestExternalCodecs extends 
     public FieldsProducer fieldsProducer(SegmentReadState readState)
       throws IOException {
     
-      return state.get(readState.segmentInfo.name);
+      synchronized(state) {
+        return state.get(readState.segmentInfo.name);
+      }
     }
 
     @Override
@@ -612,7 +614,7 @@ public class TestExternalCodecs extends 
     dir.setCheckIndexOnClose(false); // we use a custom codec provider
     IndexWriter w = new IndexWriter(
         dir,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, true)).
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
             setCodecProvider(provider).
             setMergePolicy(newLogMergePolicy(3))
     );

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java Mon May  9 15:24:04 2011
@@ -19,17 +19,21 @@ package org.apache.lucene;
 import java.io.IOException;
 
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.MergePolicy.OneMerge;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
-
 /**
  * Holds tests cases to verify external APIs are accessible
  * while not being in org.apache.lucene.index package.
@@ -90,7 +94,7 @@ public class TestMergeSchedulerExternal 
     doc.add(idField);
     
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
-        TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new MyMergeScheduler())
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergeScheduler(new MyMergeScheduler())
         .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
         .setMergePolicy(newLogMergePolicy()));
     LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
@@ -106,4 +110,40 @@ public class TestMergeSchedulerExternal 
     assertTrue(excCalled);
     dir.close();
   }
+  
+  private static class ReportingMergeScheduler extends MergeScheduler {
+
+    @Override
+    public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
+      OneMerge merge = null;
+      while ((merge = writer.getNextMerge()) != null) {
+        if (VERBOSE) {
+          System.out.println("executing merge " + merge.segString(writer.getDirectory()));
+        }
+        writer.merge(merge);
+      }
+    }
+
+    @Override
+    public void close() throws CorruptIndexException, IOException {}
+    
+  }
+
+  public void testCustomMergeScheduler() throws Exception {
+    // we don't really need to execute anything, just to make sure the custom MS
+    // compiles. But ensure that it can be used as well, e.g., no other hidden
+    // dependencies or something. Therefore, don't use any random API !
+    Directory dir = new RAMDirectory();
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergeScheduler(new ReportingMergeScheduler());
+    IndexWriter writer = new IndexWriter(dir, conf);
+    writer.addDocument(new Document());
+    writer.commit(); // trigger flush
+    writer.addDocument(new Document());
+    writer.commit(); // trigger flush
+    writer.optimize();
+    writer.close();
+    dir.close();
+  }
+  
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java Mon May  9 15:24:04 2011
@@ -72,7 +72,7 @@ public class TestSearch extends LuceneTe
     private void doTestSearch(Random random, PrintWriter out, boolean useCompoundFile)
     throws Exception {
       Directory directory = newDirectory();
-      Analyzer analyzer = new MockAnalyzer();
+      Analyzer analyzer = new MockAnalyzer(random);
       IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
       MergePolicy mp = conf.getMergePolicy();
       if (mp instanceof LogMergePolicy) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Mon May  9 15:24:04 2011
@@ -78,7 +78,7 @@ public class TestSearchForDuplicates ext
 
   private void doTest(Random random, PrintWriter out, boolean useCompoundFiles) throws Exception {
       Directory directory = newDirectory();
-      Analyzer analyzer = new MockAnalyzer();
+      Analyzer analyzer = new MockAnalyzer(random);
       IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
       final MergePolicy mp = conf.getMergePolicy();
       if (mp instanceof LogMergePolicy) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Mon May  9 15:24:04 2011
@@ -1,5 +1,6 @@
 package org.apache.lucene.analysis;
 
+import java.io.StringReader;
 import java.util.Arrays;
 
 import org.apache.lucene.util.automaton.Automaton;
@@ -29,7 +30,7 @@ public class TestMockAnalyzer extends Ba
 
   /** Test a configuration that behaves a lot like WhitespaceAnalyzer */
   public void testWhitespace() throws Exception {
-    Analyzer a = new MockAnalyzer();
+    Analyzer a = new MockAnalyzer(random);
     assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
         new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
     assertAnalyzesToReuse(a, "aba cadaba shazam",
@@ -40,7 +41,7 @@ public class TestMockAnalyzer extends Ba
   
   /** Test a configuration that behaves a lot like SimpleAnalyzer */
   public void testSimple() throws Exception {
-    Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
+    Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
     assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
         new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
     assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
@@ -51,7 +52,7 @@ public class TestMockAnalyzer extends Ba
   
   /** Test a configuration that behaves a lot like KeywordAnalyzer */
   public void testKeyword() throws Exception {
-    Analyzer a = new MockAnalyzer(MockTokenizer.KEYWORD, false);
+    Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
     assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
         new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
     assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
@@ -62,13 +63,13 @@ public class TestMockAnalyzer extends Ba
   
   /** Test a configuration that behaves a lot like StopAnalyzer */
   public void testStop() throws Exception {
-    Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+    Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
     assertAnalyzesTo(a, "the quick brown a fox",
         new String[] { "quick", "brown", "fox" },
         new int[] { 2, 1, 2 });
     
     // disable positions
-    a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+    a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
     assertAnalyzesTo(a, "the quick brown a fox",
         new String[] { "quick", "brown", "fox" },
         new int[] { 1, 1, 1 });
@@ -81,7 +82,7 @@ public class TestMockAnalyzer extends Ba
           BasicOperations.complement(
               Automaton.union(
                   Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
-    Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, keepWords, true);
+    Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, keepWords, true);
     assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
         new String[] { "foo", "bar", "bar", "foo" },
         new int[] { 2, 2, 1, 2 });
@@ -90,9 +91,28 @@ public class TestMockAnalyzer extends Ba
   /** Test a configuration that behaves a lot like LengthFilter */
   public void testLength() throws Exception {
     CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
-    Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true, length5, true);
+    Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, true, length5, true);
     assertAnalyzesTo(a, "ok toolong fine notfine",
         new String[] { "ok", "fine" },
         new int[] { 1, 2 });
   }
+  
+  public void testLUCENE_3042() throws Exception {
+    String testString = "t";
+    
+    Analyzer analyzer = new MockAnalyzer(random);
+    TokenStream stream = analyzer.reusableTokenStream("dummy", new StringReader(testString));
+    stream.reset();
+    while (stream.incrementToken()) {
+      // consume
+    }
+    stream.end();
+    
+    assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
+  }
+
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Mon May  9 15:24:04 2011
@@ -19,11 +19,18 @@ package org.apache.lucene.index;
 
 import org.apache.lucene.util.*;
 import org.apache.lucene.store.*;
+import org.apache.lucene.search.*;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.index.codecs.CodecProvider;
+import java.io.File;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
 import org.junit.Ignore;
 
 // NOTE: this test will fail w/ PreFlexRW codec!  (Because
@@ -36,7 +43,7 @@ import org.junit.Ignore;
 //
 //   ant compile-test
 //
-//   java -server -Xmx2g -Xms2g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=SimpleFSDirectory -Dtests.codec=Standard -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms
+//   java -server -Xmx8g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms
 //
 
 public class Test2BTerms extends LuceneTestCase {
@@ -45,17 +52,21 @@ public class Test2BTerms extends LuceneT
 
   private final static BytesRef bytes = new BytesRef(TOKEN_LEN);
 
-  private static final class MyTokenStream extends TokenStream {
+  private final static class MyTokenStream extends TokenStream {
 
     private final int tokensPerDoc;
     private int tokenCount;
-    private int byteUpto;
+    public final List<BytesRef> savedTerms = new ArrayList<BytesRef>();
+    private int nextSave;
+    private final Random random;
 
-    public MyTokenStream(int tokensPerDoc) {
+    public MyTokenStream(Random random, int tokensPerDoc) {
       super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
       this.tokensPerDoc = tokensPerDoc;
       addAttribute(TermToBytesRefAttribute.class);
       bytes.length = TOKEN_LEN;
+      this.random = random;
+      nextSave = _TestUtil.nextInt(random, 500000, 1000000);
     }
     
     @Override
@@ -65,6 +76,11 @@ public class Test2BTerms extends LuceneT
       }
       random.nextBytes(bytes.bytes);
       tokenCount++;
+      if (--nextSave == 0) {
+        savedTerms.add(new BytesRef(bytes));
+        System.out.println("TEST: save term=" + bytes);
+        nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+      }
       return true;
     }
 
@@ -131,47 +147,122 @@ public class Test2BTerms extends LuceneT
       throw new RuntimeException("thist test cannot run with PreFlex codec");
     }
 
-    long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
+    final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
+
+    final int TERMS_PER_DOC = _TestUtil.nextInt(random, 100000, 1000000);
 
-    int TERMS_PER_DOC = 1000000;
+    List<BytesRef> savedTerms = null;
 
     Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
-    IndexWriter w = new IndexWriter(
-        dir,
-        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
-            setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).
-            setRAMBufferSizeMB(256.0).
-            setMergeScheduler(new ConcurrentMergeScheduler()).
-            setMergePolicy(newLogMergePolicy(false, 10))
-    );
-
-    MergePolicy mp = w.getConfig().getMergePolicy();
-    if (mp instanceof LogByteSizeMergePolicy) {
-      // 1 petabyte:
-      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
-    }
-
-    Document doc = new Document();
-    Field field = new Field("field", new MyTokenStream(TERMS_PER_DOC));
-    field.setOmitTermFreqAndPositions(true);
-    field.setOmitNorms(true);
-    doc.add(field);
-    //w.setInfoStream(System.out);
-    final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC);
-    for(int i=0;i<numDocs;i++) {
-      final long t0 = System.currentTimeMillis();
-      w.addDocument(doc);
-      System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis()-t0) + " msec");
-    }
-    System.out.println("now optimize...");
-    w.optimize();
-    w.close();
+    //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
 
-    System.out.println("now CheckIndex...");
+    if (true) {
+
+      IndexWriter w = new IndexWriter(dir,
+                                      new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+                                      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+                                      .setRAMBufferSizeMB(256.0)
+                                      .setMergeScheduler(new ConcurrentMergeScheduler())
+                                      .setMergePolicy(newLogMergePolicy(false, 10))
+                                      .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
+
+      MergePolicy mp = w.getConfig().getMergePolicy();
+      if (mp instanceof LogByteSizeMergePolicy) {
+        // 1 petabyte:
+        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
+      }
+
+      Document doc = new Document();
+      final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
+      Field field = new Field("field", ts);
+      field.setOmitTermFreqAndPositions(true);
+      field.setOmitNorms(true);
+      doc.add(field);
+      //w.setInfoStream(System.out);
+      final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC);
+
+      System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC);
+      System.out.println("numDocs=" + numDocs);
+
+      for(int i=0;i<numDocs;i++) {
+        final long t0 = System.currentTimeMillis();
+        w.addDocument(doc);
+        System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis()-t0) + " msec");
+      }
+      savedTerms = ts.savedTerms;
+
+      System.out.println("TEST: optimize");
+      w.optimize();
+      System.out.println("TEST: close writer");
+      w.close();
+    }
+
+    System.out.println("TEST: open reader");
+    final IndexReader r = IndexReader.open(dir);
+    if (savedTerms == null) {
+      savedTerms = findTerms(r);
+    }
+    final int numSavedTerms = savedTerms.size();
+    final List<BytesRef> bigOrdTerms = new ArrayList<BytesRef>(savedTerms.subList(numSavedTerms-10, numSavedTerms));
+    System.out.println("TEST: test big ord terms...");
+    testSavedTerms(r, bigOrdTerms);
+    System.out.println("TEST: test all saved terms...");
+    testSavedTerms(r, savedTerms);
+    r.close();
+
+    System.out.println("TEST: now CheckIndex...");
     CheckIndex.Status status = _TestUtil.checkIndex(dir);
     final long tc = status.segmentInfos.get(0).termIndexStatus.termCount;
     assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE);
 
     dir.close();
+    System.out.println("TEST: done!");
+  }
+
+  private List<BytesRef> findTerms(IndexReader r) throws IOException {
+    System.out.println("TEST: findTerms");
+    final TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
+    final List<BytesRef> savedTerms = new ArrayList<BytesRef>();
+    int nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+    BytesRef term;
+    while((term = termsEnum.next()) != null) {
+      if (--nextSave == 0) {
+        savedTerms.add(new BytesRef(term));
+        System.out.println("TEST: add " + term);
+        nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+      }
+    }
+    return savedTerms;
+  }
+
+  private void testSavedTerms(IndexReader r, List<BytesRef> terms) throws IOException {
+    System.out.println("TEST: run " + terms.size() + " terms on reader=" + r);
+    IndexSearcher s = new IndexSearcher(r);
+    Collections.shuffle(terms);
+    TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
+    boolean failed = false;
+    for(int iter=0;iter<10*terms.size();iter++) {
+      final BytesRef term = terms.get(random.nextInt(terms.size()));
+      System.out.println("TEST: search " + term);
+      final long t0 = System.currentTimeMillis();
+      final int count = s.search(new TermQuery(new Term("field", term)), 1).totalHits;
+      if (count <= 0) {
+        System.out.println("  FAILED: count=" + count);
+        failed = true;
+      }
+      final long t1 = System.currentTimeMillis();
+      System.out.println("  took " + (t1-t0) + " millis");
+
+      TermsEnum.SeekStatus result = termsEnum.seek(term);
+      if (result != TermsEnum.SeekStatus.FOUND) {
+        if (result == TermsEnum.SeekStatus.END) {
+          System.out.println("  FAILED: got END");
+        } else {
+          System.out.println("  FAILED: wrong term: got " + termsEnum.term());
+        }
+        failed = true;
+      }
+    }
+    assertFalse(failed);
   }
 }