You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 17:24:23 UTC
svn commit: r1101062 [9/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/
dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/...
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java Mon May 9 15:24:04 2011
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.index.Payload;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/**
@@ -33,67 +33,50 @@ public final class MockAnalyzer extends
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
private final boolean enablePositionIncrements;
- private final boolean payload;
private int positionIncrementGap;
-
- /**
- * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean)
- * MockAnalyzer(runAutomaton, lowerCase, filter, enablePositionIncrements, true}).
- */
- public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
- this(runAutomaton, lowerCase, filter, enablePositionIncrements, true);
- }
+ private final Random random;
+ private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
/**
* Creates a new MockAnalyzer.
*
+ * @param random Random for payloads behavior
* @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
* @param lowerCase true if the tokenizer should lowercase terms
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
* @param enablePositionIncrements true if position increments should reflect filtered terms.
- * @param payload if payloads should be added containing the positions (for testing)
*/
- public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements, boolean payload) {
+ public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
+ this.random = random;
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.filter = filter;
this.enablePositionIncrements = enablePositionIncrements;
- this.payload = payload;
}
/**
- * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean)
- * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true}).
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
*/
- public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true);
+ public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+ this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false);
}
- /**
- * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean)
- * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload}).
- */
- public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, boolean payload) {
- this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload);
- }
-
/**
* Create a Whitespace-lowercasing analyzer with no stopwords removal.
* <p>
- * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean)
- * MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false, true}).
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}).
*/
- public MockAnalyzer() {
- this(MockTokenizer.WHITESPACE, true);
+ public MockAnalyzer(Random random) {
+ this(random, MockTokenizer.WHITESPACE, true);
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
- if (payload){
- filt = new SimplePayloadFilter(filt, fieldName);
- }
+ filt = maybePayload(filt, fieldName);
return filt;
}
@@ -105,15 +88,19 @@ public final class MockAnalyzer extends
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
- SavedStreams saved = (SavedStreams) getPreviousTokenStream();
+ @SuppressWarnings("unchecked") Map<String,SavedStreams> map = (Map) getPreviousTokenStream();
+ if (map == null) {
+ map = new HashMap<String,SavedStreams>();
+ setPreviousTokenStream(map);
+ }
+
+ SavedStreams saved = map.get(fieldName);
if (saved == null) {
saved = new SavedStreams();
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
- if (payload){
- saved.filter = new SimplePayloadFilter(saved.filter, fieldName);
- }
- setPreviousTokenStream(saved);
+ saved.filter = maybePayload(saved.filter, fieldName);
+ map.put(fieldName, saved);
return saved.filter;
} else {
saved.tokenizer.reset(reader);
@@ -122,6 +109,28 @@ public final class MockAnalyzer extends
}
}
+ private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
+ Integer val = previousMappings.get(fieldName);
+ if (val == null) {
+ switch(random.nextInt(3)) {
+ case 0: val = -1; // no payloads
+ break;
+ case 1: val = Integer.MAX_VALUE; // variable length payload
+ break;
+ case 2: val = random.nextInt(12); // fixed length payload
+ break;
+ }
+ previousMappings.put(fieldName, val); // save it so we are consistent for this field
+ }
+
+ if (val == -1)
+ return stream;
+ else if (val == Integer.MAX_VALUE)
+ return new MockVariableLengthPayloadFilter(random, stream);
+ else
+ return new MockFixedLengthPayloadFilter(random, stream, val);
+ }
+
public void setPositionIncrementGap(int positionIncrementGap){
this.positionIncrementGap = positionIncrementGap;
}
@@ -131,35 +140,3 @@ public final class MockAnalyzer extends
return positionIncrementGap;
}
}
-
-final class SimplePayloadFilter extends TokenFilter {
- String fieldName;
- int pos;
- final PayloadAttribute payloadAttr;
- final CharTermAttribute termAttr;
-
- public SimplePayloadFilter(TokenStream input, String fieldName) {
- super(input);
- this.fieldName = fieldName;
- pos = 0;
- payloadAttr = input.addAttribute(PayloadAttribute.class);
- termAttr = input.addAttribute(CharTermAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
- pos++;
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- pos = 0;
- }
-}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Mon May 9 15:24:04 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Automaton-based tokenizer for testing. Optionally lowercases.
*/
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
/** Acts Similar to WhitespaceTokenizer */
public static final CharacterRunAutomaton WHITESPACE =
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,67 @@ public class MockTokenizer extends CharT
private final boolean lowerCase;
private int state;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ int off = 0;
+
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+ super(factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+ super(input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
}
@Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ for (;;) {
+ int startOffset = off;
+ int cp = readCodePoint();
+ if (cp < 0) {
+ break;
+ } else if (isTokenChar(cp)) {
+ int endOffset;
+ do {
+ char chars[] = Character.toChars(normalize(cp));
+ for (int i = 0; i < chars.length; i++)
+ termAtt.append(chars[i]);
+ endOffset = off;
+ cp = readCodePoint();
+ } while (cp >= 0 && isTokenChar(cp));
+ offsetAtt.setOffset(startOffset, endOffset);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected int readCodePoint() throws IOException {
+ int ch = input.read();
+ if (ch < 0) {
+ return ch;
+ } else {
+ assert !Character.isLowSurrogate((char) ch);
+ off++;
+ if (Character.isHighSurrogate((char) ch)) {
+ int ch2 = input.read();
+ if (ch2 >= 0) {
+ off++;
+ assert Character.isLowSurrogate((char) ch2);
+ return Character.toCodePoint((char) ch, (char) ch2);
+ }
+ }
+ return ch;
+ }
+ }
+
protected boolean isTokenChar(int c) {
state = runAutomaton.step(state, c);
if (state < 0) {
@@ -70,7 +117,6 @@ public class MockTokenizer extends CharT
}
}
- @Override
protected int normalize(int c) {
return lowerCase ? Character.toLowerCase(c) : c;
}
@@ -79,5 +125,12 @@ public class MockTokenizer extends CharT
public void reset() throws IOException {
super.reset();
state = runAutomaton.getInitialState();
+ off = 0;
+ }
+
+ @Override
+ public void end() throws IOException {
+ int finalOffset = correctOffset(off);
+ offsetAtt.setOffset(finalOffset, finalOffset);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java Mon May 9 15:24:04 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
+import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,6 +31,8 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
class DocHelper {
@@ -218,9 +221,9 @@ class DocHelper {
* @param doc
* @throws IOException
*/
- public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
+ public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
{
- return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc);
+ return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
}
/**
@@ -233,8 +236,8 @@ class DocHelper {
* @param doc
* @throws IOException
*/
- public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
+ public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Mon May 9 15:24:04 2011
@@ -42,6 +42,7 @@ public class RandomIndexWriter implement
private final Random r;
int docCount;
int flushAt;
+ private double flushAtFactor = 1.0;
private boolean getReaderCalled;
// Randomly calls Thread.yield so we mixup thread scheduling
@@ -67,7 +68,7 @@ public class RandomIndexWriter implement
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
public RandomIndexWriter(Random r, Directory dir) throws IOException {
- this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer()));
+ this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r)));
}
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
@@ -98,12 +99,20 @@ public class RandomIndexWriter implement
*/
public void addDocument(Document doc) throws IOException {
w.addDocument(doc);
+ maybeCommit();
+ }
+
+ private void maybeCommit() throws IOException {
if (docCount++ == flushAt) {
if (LuceneTestCase.VERBOSE) {
- System.out.println("RIW.addDocument: now doing a commit");
+ System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
}
w.commit();
- flushAt += _TestUtil.nextInt(r, 10, 1000);
+ flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
+ if (flushAtFactor < 2e6) {
+ // gradually but exponentially increase time b/w flushes
+ flushAtFactor *= 1.05;
+ }
}
}
@@ -113,13 +122,7 @@ public class RandomIndexWriter implement
*/
public void updateDocument(Term t, Document doc) throws IOException {
w.updateDocument(t, doc);
- if (docCount++ == flushAt) {
- if (LuceneTestCase.VERBOSE) {
- System.out.println("RIW.updateDocument: now doing a commit");
- }
- w.commit();
- flushAt += _TestUtil.nextInt(r, 10, 1000);
- }
+ maybeCommit();
}
public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
@@ -181,7 +184,7 @@ public class RandomIndexWriter implement
System.out.println("RIW.getReader: open new reader");
}
w.commit();
- return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10));
+ return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Mon May 9 15:24:04 2011
@@ -120,7 +120,14 @@ public class MockRandomCodec extends Cod
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-
+ // we pull this before the seed intentionally: because its not consumed at runtime
+ // (the skipInterval is written into postings header)
+ int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
+ }
+
final long seed = seedRandom.nextLong();
if (LuceneTestCase.VERBOSE) {
@@ -136,12 +143,12 @@ public class MockRandomCodec extends Cod
PostingsWriterBase postingsWriter;
if (random.nextBoolean()) {
- postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random));
+ postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random), skipInterval);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Standard postings");
}
- postingsWriter = new StandardPostingsWriter(state);
+ postingsWriter = new StandardPostingsWriter(state, skipInterval);
}
if (random.nextBoolean()) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java Mon May 9 15:24:04 2011
@@ -2,13 +2,14 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
+import java.lang.reflect.Method;
import junit.framework.Assert;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiReader;
@@ -166,7 +167,7 @@ public class QueryUtils {
throws IOException {
Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)));
for (int i = 0; i < numDeletedDocs; i++) {
w.addDocument(new Document());
}
@@ -309,7 +310,7 @@ public class QueryUtils {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
final IndexReader previousReader = lastReader[0];
- IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
+ IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false);
Weight w = q.weight(indexSearcher);
Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def());
if (scorer != null) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Mon May 9 15:24:04 2011
@@ -32,7 +32,9 @@ import java.util.Random;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ThrottledIndexOutput;
import org.apache.lucene.util._TestUtil;
/**
@@ -68,6 +70,7 @@ public class MockDirectoryWrapper extend
private Set<String> createdFiles;
Set<String> openFilesForWrite = new HashSet<String>();
volatile boolean crashed;
+ private ThrottledIndexOutput throttledOutput;
// use this for tracking files for crash.
// additionally: provides debugging information in case you leave one open
@@ -113,6 +116,10 @@ public class MockDirectoryWrapper extend
public void setPreventDoubleWrite(boolean value) {
preventDoubleWrite = value;
}
+
+ public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
+ this.throttledOutput = throttledOutput;
+ }
@Override
public synchronized void sync(Collection<String> names) throws IOException {
@@ -347,7 +354,7 @@ public class MockDirectoryWrapper extend
IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
openFilesForWrite.add(name);
- return io;
+ return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
}
@Override
@@ -419,12 +426,30 @@ public class MockDirectoryWrapper extend
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause);
}
open = false;
- if (checkIndexOnClose && IndexReader.indexExists(this)) {
- _TestUtil.checkIndex(this);
+ if (checkIndexOnClose) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
+ }
+ if (codecProvider != null) {
+ if (IndexReader.indexExists(this, codecProvider)) {
+ _TestUtil.checkIndex(this, codecProvider);
+ }
+ } else {
+ if (IndexReader.indexExists(this)) {
+ _TestUtil.checkIndex(this);
+ }
+ }
}
delegate.close();
}
+ private CodecProvider codecProvider;
+
+ // We pass this CodecProvider to checkIndex when dir is closed...
+ public void setCodecProvider(CodecProvider cp) {
+ codecProvider = cp;
+ }
+
boolean open = true;
public synchronized boolean isOpen() {
@@ -559,4 +584,5 @@ public class MockDirectoryWrapper extend
maybeYield();
delegate.copy(to, src, dest);
}
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Mon May 9 15:24:04 2011
@@ -24,7 +24,6 @@ import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.InputStream;
-import java.io.BufferedInputStream;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
import java.util.Random;
@@ -79,8 +78,7 @@ public class LineFileDocs implements Clo
size *= 2.8;
}
- final InputStream in = new BufferedInputStream(is, BUFFER_SIZE);
- reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE);
+ reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
// Override sizes for currently "known" line files:
if (path.equals("europarl.lines.txt.gz")) {
@@ -128,7 +126,7 @@ public class LineFileDocs implements Clo
body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(body);
- id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+ id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(id);
date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Mon May 9 15:24:04 2011
@@ -116,7 +116,7 @@ public abstract class LuceneTestCase ext
* If this is set, it is the only method that should run.
*/
static final String TEST_METHOD;
-
+
/** Create indexes in this directory, optimally use a subdir, named after the test */
public static final File TEMP_DIR;
static {
@@ -128,12 +128,17 @@ public abstract class LuceneTestCase ext
TEMP_DIR = new File(s);
TEMP_DIR.mkdirs();
}
+
+ /** set of directories we created, in afterclass we try to clean these up */
+ static final Set<String> tempDirs = Collections.synchronizedSet(new HashSet<String>());
// by default we randomly pick a different codec for
// each test case (non-J4 tests) and each test class (J4
// tests)
/** Gets the codec to run tests with. */
public static final String TEST_CODEC = System.getProperty("tests.codec", "randomPerField");
+ /** Gets the codecprovider to run tests with */
+ public static final String TEST_CODECPROVIDER = System.getProperty("tests.codecprovider", "random");
/** Gets the locale to run tests with */
public static final String TEST_LOCALE = System.getProperty("tests.locale", "random");
/** Gets the timezone to run tests with */
@@ -160,11 +165,11 @@ public abstract class LuceneTestCase ext
* multiply it by the number of iterations
*/
public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1"));
-
+
private int savedBoolMaxClauseCount;
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
-
+
/** Used to track if setUp and tearDown are called correctly from subclasses */
private boolean setup;
@@ -186,28 +191,28 @@ public abstract class LuceneTestCase ext
private static class UncaughtExceptionEntry {
public final Thread thread;
public final Throwable exception;
-
+
public UncaughtExceptionEntry(Thread thread, Throwable exception) {
this.thread = thread;
this.exception = exception;
}
}
private List<UncaughtExceptionEntry> uncaughtExceptions = Collections.synchronizedList(new ArrayList<UncaughtExceptionEntry>());
-
+
// saves default codec: we do this statically as many build indexes in @beforeClass
private static String savedDefaultCodec;
// default codec: not set when we use a per-field provider.
private static Codec codec;
// default codec provider
private static CodecProvider savedCodecProvider;
-
+
private static Locale locale;
private static Locale savedLocale;
private static TimeZone timeZone;
private static TimeZone savedTimeZone;
-
+
private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
-
+
private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"};
private static void swapCodec(Codec c, CodecProvider cp) {
@@ -285,7 +290,7 @@ public abstract class LuceneTestCase ext
// randomly picks from core and test codecs
static String pickRandomCodec(Random rnd) {
- int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length +
+ int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length +
TEST_CODECS.length);
if (idx < CodecProvider.CORE_CODECS.length) {
return CodecProvider.CORE_CODECS[idx];
@@ -318,22 +323,46 @@ public abstract class LuceneTestCase ext
/** @deprecated (4.0) until we fix no-fork problems in solr tests */
@Deprecated
private static List<String> testClassesRun = new ArrayList<String>();
-
+
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
random.setSeed(staticSeed);
+ tempDirs.clear();
stores = Collections.synchronizedMap(new IdentityHashMap<MockDirectoryWrapper,StackTraceElement[]>());
savedCodecProvider = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (random.nextInt(4) == 0) { // preflex-only setup
- codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
- } else { // per-field setup
- CodecProvider.setDefault(new RandomCodecProvider(random));
+ if ("random".equals(TEST_CODECPROVIDER)) {
+ if ("randomPerField".equals(TEST_CODEC)) {
+ if (random.nextInt(4) == 0) { // preflex-only setup
+ codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
+ } else { // per-field setup
+ CodecProvider.setDefault(new RandomCodecProvider(random));
+ codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ }
+ } else { // ordinary setup
codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
}
- } else { // ordinary setup
- codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ } else {
+ // someone specified their own codecprovider by class
+ try {
+ Class<? extends CodecProvider> cpClazz = Class.forName(TEST_CODECPROVIDER).asSubclass(CodecProvider.class);
+ CodecProvider cp = cpClazz.newInstance();
+ String codecName;
+ if (TEST_CODEC.startsWith("random")) { // TODO: somehow do random per-field?!
+ Set<String> codecSet = cp.listAll();
+ String availableCodecs[] = codecSet.toArray(new String[codecSet.size()]);
+ codecName = availableCodecs[random.nextInt(availableCodecs.length)];
+ } else {
+ codecName = TEST_CODEC;
+ }
+
+ codec = cp.lookup(codecName);
+ cp.setDefaultFieldCodec(codecName);
+ CodecProvider.setDefault(cp);
+ } catch (Exception e) {
+ System.err.println("Could not instantiate CodecProvider: " + TEST_CODECPROVIDER);
+ throw new RuntimeException(e);
+ }
}
savedLocale = Locale.getDefault();
locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
@@ -343,7 +372,7 @@ public abstract class LuceneTestCase ext
TimeZone.setDefault(timeZone);
testsFailed = false;
}
-
+
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
if (! "false".equals(TEST_CLEAN_THREADS)) {
@@ -356,16 +385,13 @@ public abstract class LuceneTestCase ext
String codecDescription;
CodecProvider cp = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (cp instanceof RandomCodecProvider)
- codecDescription = cp.toString();
- else
- codecDescription = "PreFlex";
+ if ("randomPerField".equals(TEST_CODEC) && cp instanceof RandomCodecProvider) {
+ codecDescription = cp.toString();
} else {
codecDescription = codec.toString();
}
-
- if (CodecProvider.getDefault() == savedCodecProvider)
+
+ if ("random".equals(TEST_CODECPROVIDER) && CodecProvider.getDefault() == savedCodecProvider)
removeTestCodecs(codec, CodecProvider.getDefault());
CodecProvider.setDefault(savedCodecProvider);
Locale.setDefault(savedLocale);
@@ -394,14 +420,14 @@ public abstract class LuceneTestCase ext
stores = null;
// if verbose or tests failed, report some information back
if (VERBOSE || testsFailed)
- System.err.println("NOTE: test params are: codec=" + codecDescription +
- ", locale=" + locale +
+ System.err.println("NOTE: test params are: codec=" + codecDescription +
+ ", locale=" + locale +
", timezone=" + (timeZone == null ? "(null)" : timeZone.getID()));
if (testsFailed) {
System.err.println("NOTE: all tests run in this JVM:");
System.err.println(Arrays.toString(testClassesRun.toArray()));
- System.err.println("NOTE: " + System.getProperty("os.name") + " "
- + System.getProperty("os.version") + " "
+ System.err.println("NOTE: " + System.getProperty("os.name") + " "
+ + System.getProperty("os.version") + " "
+ System.getProperty("os.arch") + "/"
+ System.getProperty("java.vendor") + " "
+ System.getProperty("java.version") + " "
@@ -411,10 +437,20 @@ public abstract class LuceneTestCase ext
+ "free=" + Runtime.getRuntime().freeMemory() + ","
+ "total=" + Runtime.getRuntime().totalMemory());
}
+ // clear out any temp directories if we can
+ if (!testsFailed) {
+ for (String path : tempDirs) {
+ try {
+ _TestUtil.rmDir(new File(path));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
}
private static boolean testsFailed; /* true if any tests failed */
-
+
// This is how we get control when errors occur.
// Think of this as start/end/success/failed
// events.
@@ -449,7 +485,7 @@ public abstract class LuceneTestCase ext
LuceneTestCase.this.name = method.getName();
super.starting(method);
}
-
+
};
@Before
@@ -467,7 +503,7 @@ public abstract class LuceneTestCase ext
savedUncaughtExceptionHandler.uncaughtException(t, e);
}
});
-
+
savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
}
@@ -499,7 +535,7 @@ public abstract class LuceneTestCase ext
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test method: '" + getName() + "'");
if (rogueThreads > 0) {
- System.err.println("RESOURCE LEAK: test method: '" + getName()
+ System.err.println("RESOURCE LEAK: test method: '" + getName()
+ "' left " + rogueThreads + " thread(s) running");
// TODO: fail, but print seed for now.
if (!testsFailed && uncaughtExceptions.isEmpty()) {
@@ -521,18 +557,18 @@ public abstract class LuceneTestCase ext
fail("Some threads threw uncaught exceptions!");
}
- // calling assertSaneFieldCaches here isn't as useful as having test
- // classes call it directly from the scope where the index readers
- // are used, because they could be gc'ed just before this tearDown
+ // calling assertSaneFieldCaches here isn't as useful as having test
+ // classes call it directly from the scope where the index readers
+ // are used, because they could be gc'ed just before this tearDown
// method is called.
//
// But it's better then nothing.
//
- // If you are testing functionality that you know for a fact
- // "violates" FieldCache sanity, then you should either explicitly
+ // If you are testing functionality that you know for a fact
+ // "violates" FieldCache sanity, then you should either explicitly
// call purgeFieldCache at the end of your test method, or refactor
- // your Test class so that the inconsistant FieldCache usages are
- // isolated in distinct test methods
+ // your Test class so that the inconsistant FieldCache usages are
+ // isolated in distinct test methods
assertSaneFieldCaches(getTestLabel());
} finally {
@@ -543,14 +579,14 @@ public abstract class LuceneTestCase ext
private final static int THREAD_STOP_GRACE_MSEC = 50;
// jvm-wide list of 'rogue threads' we found, so they only get reported once.
private final static IdentityHashMap<Thread,Boolean> rogueThreads = new IdentityHashMap<Thread,Boolean>();
-
+
static {
// just a hack for things like eclipse test-runner threads
for (Thread t : Thread.getAllStackTraces().keySet()) {
rogueThreads.put(t, true);
}
}
-
+
/**
* Looks for leftover running threads, trying to kill them off,
* so they don't fail future tests.
@@ -561,20 +597,20 @@ public abstract class LuceneTestCase ext
Thread[] stillRunning = new Thread[Thread.activeCount()+1];
int threadCount = 0;
int rogueCount = 0;
-
+
if ((threadCount = Thread.enumerate(stillRunning)) > 1) {
while (threadCount == stillRunning.length) {
// truncated response
stillRunning = new Thread[stillRunning.length*2];
threadCount = Thread.enumerate(stillRunning);
}
-
+
for (int i = 0; i < threadCount; i++) {
Thread t = stillRunning[i];
-
- if (t.isAlive() &&
- !rogueThreads.containsKey(t) &&
- t != Thread.currentThread() &&
+
+ if (t.isAlive() &&
+ !rogueThreads.containsKey(t) &&
+ t != Thread.currentThread() &&
/* its ok to keep your searcher across test cases */
(t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) {
System.err.println("WARNING: " + context + " left thread running: " + t);
@@ -599,7 +635,7 @@ public abstract class LuceneTestCase ext
}
return rogueCount;
}
-
+
/**
* Asserts that FieldCacheSanityChecker does not detect any
* problems with FieldCache.DEFAULT.
@@ -642,13 +678,13 @@ public abstract class LuceneTestCase ext
}
}
-
+
// @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed:
@Deprecated
static public void assertEquals(double expected, double actual) {
assertEquals(null, expected, actual);
}
-
+
@Deprecated
static public void assertEquals(String message, double expected, double actual) {
assertEquals(message, Double.valueOf(expected), Double.valueOf(actual));
@@ -663,18 +699,18 @@ public abstract class LuceneTestCase ext
static public void assertEquals(String message, float expected, float actual) {
assertEquals(message, Float.valueOf(expected), Float.valueOf(actual));
}
-
+
// Replacement for Assume jUnit class, so we can add a message with explanation:
-
+
private static final class TestIgnoredException extends RuntimeException {
TestIgnoredException(String msg) {
super(msg);
}
-
+
TestIgnoredException(String msg, Throwable t) {
super(msg, t);
}
-
+
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
@@ -682,7 +718,7 @@ public abstract class LuceneTestCase ext
sb.append(" - ").append(getCause());
return sb.toString();
}
-
+
// only this one is called by our code, exception is not used outside this class:
@Override
public void printStackTrace(PrintStream s) {
@@ -694,19 +730,19 @@ public abstract class LuceneTestCase ext
}
}
}
-
+
public static void assumeTrue(String msg, boolean b) {
Assume.assumeNoException(b ? null : new TestIgnoredException(msg));
}
-
+
public static void assumeFalse(String msg, boolean b) {
assumeTrue(msg, !b);
}
-
+
public static void assumeNoException(String msg, Exception e) {
Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e));
}
-
+
public static <T> Set<T> asSet(T... args) {
return new HashSet<T>(Arrays.asList(args));
}
@@ -764,13 +800,15 @@ public abstract class LuceneTestCase ext
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
}
if (r.nextBoolean()) {
- c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20));
+ c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20)));
}
if (r.nextBoolean()) {
- c.setMergePolicy(new MockRandomMergePolicy(r));
- } else {
+ c.setMergePolicy(newTieredMergePolicy());
+ } else if (r.nextBoolean()) {
c.setMergePolicy(newLogMergePolicy());
+ } else {
+ c.setMergePolicy(new MockRandomMergePolicy(r));
}
c.setReaderPooling(r.nextBoolean());
@@ -782,6 +820,10 @@ public abstract class LuceneTestCase ext
return newLogMergePolicy(random);
}
+ public static TieredMergePolicy newTieredMergePolicy() {
+ return newTieredMergePolicy(random);
+ }
+
public static LogMergePolicy newLogMergePolicy(Random r) {
LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy();
logmp.setUseCompoundFile(r.nextBoolean());
@@ -794,17 +836,22 @@ public abstract class LuceneTestCase ext
return logmp;
}
- public static LogMergePolicy newInOrderLogMergePolicy() {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setRequireContiguousMerge(true);
- return logmp;
- }
-
- public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setMergeFactor(mergeFactor);
- logmp.setRequireContiguousMerge(true);
- return logmp;
+ public static TieredMergePolicy newTieredMergePolicy(Random r) {
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ if (r.nextInt(3) == 2) {
+ tmp.setMaxMergeAtOnce(2);
+ tmp.setMaxMergeAtOnceExplicit(2);
+ } else {
+ tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20));
+ tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30));
+ }
+ tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0);
+ tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20));
+ tmp.setUseCompoundFile(r.nextBoolean());
+ tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+ return tmp;
}
public static LogMergePolicy newLogMergePolicy(boolean useCFS) {
@@ -839,7 +886,7 @@ public abstract class LuceneTestCase ext
public static MockDirectoryWrapper newDirectory() throws IOException {
return newDirectory(random);
}
-
+
/**
* Returns a new Directory instance, using the specified random.
* See {@link #newDirectory()} for more information.
@@ -850,7 +897,7 @@ public abstract class LuceneTestCase ext
stores.put(dir, Thread.currentThread().getStackTrace());
return dir;
}
-
+
/**
* Returns a new Directory instance, with contents copied from the
* provided directory. See {@link #newDirectory()} for more
@@ -859,23 +906,23 @@ public abstract class LuceneTestCase ext
public static MockDirectoryWrapper newDirectory(Directory d) throws IOException {
return newDirectory(random, d);
}
-
+
/** Returns a new FSDirectory instance over the given file, which must be a folder. */
public static MockDirectoryWrapper newFSDirectory(File f) throws IOException {
return newFSDirectory(f, null);
}
-
+
/** Returns a new FSDirectory instance over the given file, which must be a folder. */
public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException {
String fsdirClass = TEST_DIRECTORY;
if (fsdirClass.equals("random")) {
fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
}
-
+
if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
fsdirClass = "org.apache.lucene.store." + fsdirClass;
}
-
+
Class<? extends FSDirectory> clazz;
try {
try {
@@ -883,11 +930,11 @@ public abstract class LuceneTestCase ext
} catch (ClassCastException e) {
// TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random
fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
-
+
if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
fsdirClass = "org.apache.lucene.store." + fsdirClass;
}
-
+
clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class);
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf));
@@ -897,7 +944,7 @@ public abstract class LuceneTestCase ext
throw new RuntimeException(e);
}
}
-
+
/**
* Returns a new Directory instance, using the specified random
* with contents copied from the provided directory. See
@@ -955,44 +1002,44 @@ public abstract class LuceneTestCase ext
public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) {
if (!index.isIndexed())
return new Field(name, value, store, index);
-
+
if (!store.isStored() && random.nextBoolean())
store = Store.YES; // randomly store it
-
+
tv = randomTVSetting(random, tv);
-
+
return new Field(name, value, store, index, tv);
}
-
- static final TermVector tvSettings[] = {
- TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS,
- TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS
+
+ static final TermVector tvSettings[] = {
+ TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS,
+ TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS
};
-
+
private static TermVector randomTVSetting(Random random, TermVector minimum) {
switch(minimum) {
case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)];
case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)];
- case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS
+ case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS
: TermVector.WITH_POSITIONS_OFFSETS;
- case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS
+ case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS
: TermVector.WITH_POSITIONS_OFFSETS;
default: return TermVector.WITH_POSITIONS_OFFSETS;
}
}
-
+
/** return a random Locale from the available locales on the system */
public static Locale randomLocale(Random random) {
Locale locales[] = Locale.getAvailableLocales();
return locales[random.nextInt(locales.length)];
}
-
+
/** return a random TimeZone from the available timezones on the system */
public static TimeZone randomTimeZone(Random random) {
String tzIds[] = TimeZone.getAvailableIDs();
return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]);
}
-
+
/** return a Locale object equivalent to its programmatic name */
public static Locale localeForName(String localeName) {
String elements[] = localeName.split("\\_");
@@ -1014,7 +1061,7 @@ public abstract class LuceneTestCase ext
"RAMDirectory",
FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2]
};
-
+
public static String randomDirectory(Random random) {
if (random.nextInt(10) == 0) {
return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)];
@@ -1026,20 +1073,21 @@ public abstract class LuceneTestCase ext
private static Directory newFSDirectoryImpl(
Class<? extends FSDirectory> clazz, File file, LockFactory lockFactory)
throws IOException {
+ FSDirectory d = null;
try {
// Assuming every FSDirectory has a ctor(File), but not all may take a
// LockFactory too, so setting it afterwards.
Constructor<? extends FSDirectory> ctor = clazz.getConstructor(File.class);
- FSDirectory d = ctor.newInstance(file);
- if (lockFactory != null) {
- d.setLockFactory(lockFactory);
- }
- return d;
+ d = ctor.newInstance(file);
} catch (Exception e) {
- return FSDirectory.open(file);
+ d = FSDirectory.open(file);
+ }
+ if (lockFactory != null) {
+ d.setLockFactory(lockFactory);
}
+ return d;
}
-
+
static Directory newDirectoryImpl(Random random, String clazzName) {
if (clazzName.equals("random"))
clazzName = randomDirectory(random);
@@ -1052,6 +1100,7 @@ public abstract class LuceneTestCase ext
final File tmpFile = File.createTempFile("test", "tmp", TEMP_DIR);
tmpFile.delete();
tmpFile.mkdir();
+ tempDirs.add(tmpFile.getAbsolutePath());
return newFSDirectoryImpl(clazz.asSubclass(FSDirectory.class), tmpFile, null);
}
@@ -1059,18 +1108,31 @@ public abstract class LuceneTestCase ext
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
- }
+ }
}
-
+
/** create a new searcher over the reader.
* This searcher might randomly use threads. */
public static IndexSearcher newSearcher(IndexReader r) throws IOException {
+ return newSearcher(r, true);
+ }
+
+ /** create a new searcher over the reader.
+ * This searcher might randomly use threads.
+ * if <code>maybeWrap</code> is true, this searcher might wrap the reader
+ * with one that returns null for getSequentialSubReaders.
+ */
+ public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException {
if (random.nextBoolean()) {
- return new IndexSearcher(r);
+ if (maybeWrap && random.nextBoolean()) {
+ return new IndexSearcher(new SlowMultiReaderWrapper(r));
+ } else {
+ return new IndexSearcher(r);
+ }
} else {
int threads = 0;
- final ExecutorService ex = (random.nextBoolean()) ? null
- : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
+ final ExecutorService ex = (random.nextBoolean()) ? null
+ : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
new NamedThreadFactory("LuceneTestCase"));
if (ex != null && VERBOSE) {
System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
@@ -1095,12 +1157,12 @@ public abstract class LuceneTestCase ext
public String getName() {
return this.name;
}
-
+
/** Gets a resource from the classpath as {@link File}. This method should only be used,
* if a real file is needed. To get a stream, code should prefer
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
*/
-
+
protected File getDataFile(String name) throws IOException {
try {
return new File(this.getClass().getResource(name).toURI());
@@ -1111,11 +1173,11 @@ public abstract class LuceneTestCase ext
// We get here from InterceptTestCaseEvents on the 'failed' event....
public void reportAdditionalFailureInfo() {
- System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName()
+ System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName()
+ " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed)
+ reproduceWithExtraParams());
}
-
+
// extra params that were overridden needed to reproduce the command
private String reproduceWithExtraParams() {
StringBuilder sb = new StringBuilder();
@@ -1131,12 +1193,12 @@ public abstract class LuceneTestCase ext
private static long staticSeed;
// seed for individual test methods, changed in @before
private long seed;
-
+
private static final Random seedRand = new Random();
protected static final Random random = new Random(0);
private String name = "<unknown>";
-
+
/**
* Annotation for tests that should only be run during nightly builds.
*/
@@ -1144,7 +1206,7 @@ public abstract class LuceneTestCase ext
@Inherited
@Retention(RetentionPolicy.RUNTIME)
public @interface Nightly {}
-
+
/** optionally filters the tests to be run by TEST_METHOD */
public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner {
private List<FrameworkMethod> testMethods;
@@ -1174,11 +1236,11 @@ public abstract class LuceneTestCase ext
testMethods.add(new FrameworkMethod(m));
}
}
-
+
if (testMethods.isEmpty()) {
throw new RuntimeException("No runnable methods!");
}
-
+
if (TEST_NIGHTLY == false) {
if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) {
/* the test class is annotated with nightly, remove all methods */
@@ -1213,17 +1275,15 @@ public abstract class LuceneTestCase ext
// only print iteration info if the user requested more than one iterations
boolean verbose = VERBOSE && TEST_ITER > 1;
- int lastIterFailed = -1;
for (int i = 0; i < TEST_ITER; i++) {
if (verbose) {
System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER);
}
super.runChild(arg0, arg1);
if (testsFailed) {
- lastIterFailed = i;
- if (i == TEST_ITER_MIN - 1) {
+ if (i >= TEST_ITER_MIN - 1) {
if (verbose) {
- System.out.println("\nNOTE: iteration " + lastIterFailed + " failed !");
+ System.out.println("\nNOTE: iteration " + i + " failed !");
}
break;
}
@@ -1241,9 +1301,9 @@ public abstract class LuceneTestCase ext
@Override
public boolean shouldRun(Description d) {
return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD);
- }
+ }
};
-
+
try {
f.apply(this);
} catch (NoTestsRemainException e) {
@@ -1251,12 +1311,12 @@ public abstract class LuceneTestCase ext
}
}
}
-
+
private static class RandomCodecProvider extends CodecProvider {
private List<Codec> knownCodecs = new ArrayList<Codec>();
private Map<String,Codec> previousMappings = new HashMap<String,Codec>();
private final int perFieldSeed;
-
+
RandomCodecProvider(Random random) {
this.perFieldSeed = random.nextInt();
register(new StandardCodec());
@@ -1288,13 +1348,13 @@ public abstract class LuceneTestCase ext
}
return codec.name;
}
-
+
@Override
public synchronized String toString() {
return "RandomCodecProvider: " + previousMappings.toString();
}
}
-
+
@Ignore("just a hack")
public final void alwaysIgnoredTestMethod() {}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Mon May 9 15:24:04 2011
@@ -34,8 +34,6 @@ import java.util.HashMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
-import org.junit.Assert;
-
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CheckIndex;
@@ -43,17 +41,22 @@ import org.apache.lucene.index.Concurren
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
+import org.junit.Assert;
public class _TestUtil {
/** Returns temp dir, containing String arg in its name;
* does not create the directory. */
public static File getTempDir(String desc) {
- return new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong());
+ File f = new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong());
+ LuceneTestCase.tempDirs.add(f.getAbsolutePath());
+ return f;
}
/**
@@ -88,6 +91,7 @@ public class _TestUtil {
rmDir(destDir);
destDir.mkdir();
+ LuceneTestCase.tempDirs.add(destDir.getAbsolutePath());
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
@@ -157,6 +161,19 @@ public class _TestUtil {
return start + r.nextInt(end-start+1);
}
+ public static String randomSimpleString(Random r) {
+ final int end = r.nextInt(10);
+ if (end == 0) {
+ // allow 0 length
+ return "";
+ }
+ final char[] buffer = new char[end];
+ for (int i = 0; i < end; i++) {
+ buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
+ }
+ return new String(buffer, 0, end);
+ }
+
/** Returns random string, including full unicode range. */
public static String randomUnicodeString(Random r) {
return randomUnicodeString(r, 20);
@@ -172,22 +189,35 @@ public class _TestUtil {
return "";
}
final char[] buffer = new char[end];
- for (int i = 0; i < end; i++) {
- int t = r.nextInt(5);
+ randomFixedLengthUnicodeString(r, buffer, 0, buffer.length);
+ return new String(buffer, 0, end);
+ }
- if (0 == t && i < end - 1) {
+ /**
+ * Fills provided char[] with valid random unicode code
+ * unit sequence.
+ */
+ public static void randomFixedLengthUnicodeString(Random random, char[] chars, int offset, int length) {
+ int i = offset;
+ final int end = offset + length;
+ while(i < end) {
+ final int t = random.nextInt(5);
+ if (0 == t && i < length - 1) {
// Make a surrogate pair
// High surrogate
- buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
+ chars[i++] = (char) nextInt(random, 0xd800, 0xdbff);
// Low surrogate
- buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
+ chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff);
+ } else if (t <= 1) {
+ chars[i++] = (char) random.nextInt(0x80);
+ } else if (2 == t) {
+ chars[i++] = (char) nextInt(random, 0x80, 0x800);
+ } else if (3 == t) {
+ chars[i++] = (char) nextInt(random, 0x800, 0xd7ff);
+ } else if (4 == t) {
+ chars[i++] = (char) nextInt(random, 0xe000, 0xffff);
}
- else if (t <= 1) buffer[i] = (char) r.nextInt(0x80);
- else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800);
- else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
- else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
}
- return new String(buffer, 0, end);
}
private static final int[] blockStarts = {
@@ -294,9 +324,14 @@ public class _TestUtil {
* count lowish */
public static void reduceOpenFiles(IndexWriter w) {
// keep number of open files lowish
- LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
- lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
-
+ MergePolicy mp = w.getConfig().getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ LogMergePolicy lmp = (LogMergePolicy) mp;
+ lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
+ } else if (mp instanceof TieredMergePolicy) {
+ TieredMergePolicy tmp = (TieredMergePolicy) mp;
+ tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
+ }
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setMaxThreadCount(2);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/overview.html Mon May 9 15:24:04 2011
@@ -1,28 +1,28 @@
-<html>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<head>
- <title>Apache Lucene Test Framework API</title>
-</head>
-<body>
-<p>
- The Lucene Test Framework is used by Lucene as the basis for its tests.
- The framework can also be used for testing third-party code that uses
- the Lucene API.
-</p>
-</body>
-</html>
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<head>
+ <title>Apache Lucene Test Framework API</title>
+</head>
+<body>
+<p>
+ The Lucene Test Framework is used by Lucene as the basis for its tests.
+ The framework can also be used for testing third-party code that uses
+ the Lucene API.
+</p>
+</body>
+</html>
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestDemo.java Mon May 9 15:24:04 2011
@@ -43,13 +43,13 @@ import org.apache.lucene.util.LuceneTest
public class TestDemo extends LuceneTestCase {
public void testDemo() throws IOException, ParseException {
- Analyzer analyzer = new MockAnalyzer();
+ Analyzer analyzer = new MockAnalyzer(random);
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
//Directory directory = FSDirectory.open("/tmp/testindex");
- RandomIndexWriter iwriter = new RandomIndexWriter(random, directory);
+ RandomIndexWriter iwriter = new RandomIndexWriter(random, directory, analyzer);
iwriter.w.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Mon May 9 15:24:04 2011
@@ -587,7 +587,9 @@ public class TestExternalCodecs extends
public FieldsProducer fieldsProducer(SegmentReadState readState)
throws IOException {
- return state.get(readState.segmentInfo.name);
+ synchronized(state) {
+ return state.get(readState.segmentInfo.name);
+ }
}
@Override
@@ -612,7 +614,7 @@ public class TestExternalCodecs extends
dir.setCheckIndexOnClose(false); // we use a custom codec provider
IndexWriter w = new IndexWriter(
dir,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, true)).
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
setCodecProvider(provider).
setMergePolicy(newLogMergePolicy(3))
);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java Mon May 9 15:24:04 2011
@@ -19,17 +19,21 @@ package org.apache.lucene;
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.MergePolicy.OneMerge;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-
/**
* Holds tests cases to verify external APIs are accessible
* while not being in org.apache.lucene.index package.
@@ -90,7 +94,7 @@ public class TestMergeSchedulerExternal
doc.add(idField);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new MyMergeScheduler())
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergeScheduler(new MyMergeScheduler())
.setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(newLogMergePolicy()));
LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
@@ -106,4 +110,40 @@ public class TestMergeSchedulerExternal
assertTrue(excCalled);
dir.close();
}
+
+ private static class ReportingMergeScheduler extends MergeScheduler {
+
+ @Override
+ public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
+ OneMerge merge = null;
+ while ((merge = writer.getNextMerge()) != null) {
+ if (VERBOSE) {
+ System.out.println("executing merge " + merge.segString(writer.getDirectory()));
+ }
+ writer.merge(merge);
+ }
+ }
+
+ @Override
+ public void close() throws CorruptIndexException, IOException {}
+
+ }
+
+ public void testCustomMergeScheduler() throws Exception {
+ // we don't really need to execute anything, just to make sure the custom MS
+ // compiles. But ensure that it can be used as well, e.g., no other hidden
+ // dependencies or something. Therefore, don't use any random API !
+ Directory dir = new RAMDirectory();
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
+ conf.setMergeScheduler(new ReportingMergeScheduler());
+ IndexWriter writer = new IndexWriter(dir, conf);
+ writer.addDocument(new Document());
+ writer.commit(); // trigger flush
+ writer.addDocument(new Document());
+ writer.commit(); // trigger flush
+ writer.optimize();
+ writer.close();
+ dir.close();
+ }
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java Mon May 9 15:24:04 2011
@@ -72,7 +72,7 @@ public class TestSearch extends LuceneTe
private void doTestSearch(Random random, PrintWriter out, boolean useCompoundFile)
throws Exception {
Directory directory = newDirectory();
- Analyzer analyzer = new MockAnalyzer();
+ Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Mon May 9 15:24:04 2011
@@ -78,7 +78,7 @@ public class TestSearchForDuplicates ext
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles) throws Exception {
Directory directory = newDirectory();
- Analyzer analyzer = new MockAnalyzer();
+ Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
final MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Mon May 9 15:24:04 2011
@@ -1,5 +1,6 @@
package org.apache.lucene.analysis;
+import java.io.StringReader;
import java.util.Arrays;
import org.apache.lucene.util.automaton.Automaton;
@@ -29,7 +30,7 @@ public class TestMockAnalyzer extends Ba
/** Test a configuration that behaves a lot like WhitespaceAnalyzer */
public void testWhitespace() throws Exception {
- Analyzer a = new MockAnalyzer();
+ Analyzer a = new MockAnalyzer(random);
assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
assertAnalyzesToReuse(a, "aba cadaba shazam",
@@ -40,7 +41,7 @@ public class TestMockAnalyzer extends Ba
/** Test a configuration that behaves a lot like SimpleAnalyzer */
public void testSimple() throws Exception {
- Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
+ Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
@@ -51,7 +52,7 @@ public class TestMockAnalyzer extends Ba
/** Test a configuration that behaves a lot like KeywordAnalyzer */
public void testKeyword() throws Exception {
- Analyzer a = new MockAnalyzer(MockTokenizer.KEYWORD, false);
+ Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
@@ -62,13 +63,13 @@ public class TestMockAnalyzer extends Ba
/** Test a configuration that behaves a lot like StopAnalyzer */
public void testStop() throws Exception {
- Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+ Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
assertAnalyzesTo(a, "the quick brown a fox",
new String[] { "quick", "brown", "fox" },
new int[] { 2, 1, 2 });
// disable positions
- a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
assertAnalyzesTo(a, "the quick brown a fox",
new String[] { "quick", "brown", "fox" },
new int[] { 1, 1, 1 });
@@ -81,7 +82,7 @@ public class TestMockAnalyzer extends Ba
BasicOperations.complement(
Automaton.union(
Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
- Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, keepWords, true);
+ Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, keepWords, true);
assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
new String[] { "foo", "bar", "bar", "foo" },
new int[] { 2, 2, 1, 2 });
@@ -90,9 +91,28 @@ public class TestMockAnalyzer extends Ba
/** Test a configuration that behaves a lot like LengthFilter */
public void testLength() throws Exception {
CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
- Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true, length5, true);
+ Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, true, length5, true);
assertAnalyzesTo(a, "ok toolong fine notfine",
new String[] { "ok", "fine" },
new int[] { 1, 2 });
}
+
+ public void testLUCENE_3042() throws Exception {
+ String testString = "t";
+
+ Analyzer analyzer = new MockAnalyzer(random);
+ TokenStream stream = analyzer.reusableTokenStream("dummy", new StringReader(testString));
+ stream.reset();
+ while (stream.incrementToken()) {
+ // consume
+ }
+ stream.end();
+
+ assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Mon May 9 15:24:04 2011
@@ -19,11 +19,18 @@ package org.apache.lucene.index;
import org.apache.lucene.util.*;
import org.apache.lucene.store.*;
+import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.codecs.CodecProvider;
+import java.io.File;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
import org.junit.Ignore;
// NOTE: this test will fail w/ PreFlexRW codec! (Because
@@ -36,7 +43,7 @@ import org.junit.Ignore;
//
// ant compile-test
//
-// java -server -Xmx2g -Xms2g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=SimpleFSDirectory -Dtests.codec=Standard -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms
+// java -server -Xmx8g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms
//
public class Test2BTerms extends LuceneTestCase {
@@ -45,17 +52,21 @@ public class Test2BTerms extends LuceneT
private final static BytesRef bytes = new BytesRef(TOKEN_LEN);
- private static final class MyTokenStream extends TokenStream {
+ private final static class MyTokenStream extends TokenStream {
private final int tokensPerDoc;
private int tokenCount;
- private int byteUpto;
+ public final List<BytesRef> savedTerms = new ArrayList<BytesRef>();
+ private int nextSave;
+ private final Random random;
- public MyTokenStream(int tokensPerDoc) {
+ public MyTokenStream(Random random, int tokensPerDoc) {
super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
this.tokensPerDoc = tokensPerDoc;
addAttribute(TermToBytesRefAttribute.class);
bytes.length = TOKEN_LEN;
+ this.random = random;
+ nextSave = _TestUtil.nextInt(random, 500000, 1000000);
}
@Override
@@ -65,6 +76,11 @@ public class Test2BTerms extends LuceneT
}
random.nextBytes(bytes.bytes);
tokenCount++;
+ if (--nextSave == 0) {
+ savedTerms.add(new BytesRef(bytes));
+ System.out.println("TEST: save term=" + bytes);
+ nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+ }
return true;
}
@@ -131,47 +147,122 @@ public class Test2BTerms extends LuceneT
throw new RuntimeException("thist test cannot run with PreFlex codec");
}
- long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
+ final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
+
+ final int TERMS_PER_DOC = _TestUtil.nextInt(random, 100000, 1000000);
- int TERMS_PER_DOC = 1000000;
+ List<BytesRef> savedTerms = null;
Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
- IndexWriter w = new IndexWriter(
- dir,
- new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
- setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).
- setRAMBufferSizeMB(256.0).
- setMergeScheduler(new ConcurrentMergeScheduler()).
- setMergePolicy(newLogMergePolicy(false, 10))
- );
-
- MergePolicy mp = w.getConfig().getMergePolicy();
- if (mp instanceof LogByteSizeMergePolicy) {
- // 1 petabyte:
- ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
- }
-
- Document doc = new Document();
- Field field = new Field("field", new MyTokenStream(TERMS_PER_DOC));
- field.setOmitTermFreqAndPositions(true);
- field.setOmitNorms(true);
- doc.add(field);
- //w.setInfoStream(System.out);
- final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC);
- for(int i=0;i<numDocs;i++) {
- final long t0 = System.currentTimeMillis();
- w.addDocument(doc);
- System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis()-t0) + " msec");
- }
- System.out.println("now optimize...");
- w.optimize();
- w.close();
+ //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
- System.out.println("now CheckIndex...");
+ if (true) {
+
+ IndexWriter w = new IndexWriter(dir,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ .setRAMBufferSizeMB(256.0)
+ .setMergeScheduler(new ConcurrentMergeScheduler())
+ .setMergePolicy(newLogMergePolicy(false, 10))
+ .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
+
+ MergePolicy mp = w.getConfig().getMergePolicy();
+ if (mp instanceof LogByteSizeMergePolicy) {
+ // 1 petabyte:
+ ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
+ }
+
+ Document doc = new Document();
+ final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC);
+ Field field = new Field("field", ts);
+ field.setOmitTermFreqAndPositions(true);
+ field.setOmitNorms(true);
+ doc.add(field);
+ //w.setInfoStream(System.out);
+ final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC);
+
+ System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC);
+ System.out.println("numDocs=" + numDocs);
+
+ for(int i=0;i<numDocs;i++) {
+ final long t0 = System.currentTimeMillis();
+ w.addDocument(doc);
+ System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis()-t0) + " msec");
+ }
+ savedTerms = ts.savedTerms;
+
+ System.out.println("TEST: optimize");
+ w.optimize();
+ System.out.println("TEST: close writer");
+ w.close();
+ }
+
+ System.out.println("TEST: open reader");
+ final IndexReader r = IndexReader.open(dir);
+ if (savedTerms == null) {
+ savedTerms = findTerms(r);
+ }
+ final int numSavedTerms = savedTerms.size();
+ final List<BytesRef> bigOrdTerms = new ArrayList<BytesRef>(savedTerms.subList(numSavedTerms-10, numSavedTerms));
+ System.out.println("TEST: test big ord terms...");
+ testSavedTerms(r, bigOrdTerms);
+ System.out.println("TEST: test all saved terms...");
+ testSavedTerms(r, savedTerms);
+ r.close();
+
+ System.out.println("TEST: now CheckIndex...");
CheckIndex.Status status = _TestUtil.checkIndex(dir);
final long tc = status.segmentInfos.get(0).termIndexStatus.termCount;
assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE);
dir.close();
+ System.out.println("TEST: done!");
+ }
+
+ private List<BytesRef> findTerms(IndexReader r) throws IOException {
+ System.out.println("TEST: findTerms");
+ final TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
+ final List<BytesRef> savedTerms = new ArrayList<BytesRef>();
+ int nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+ BytesRef term;
+ while((term = termsEnum.next()) != null) {
+ if (--nextSave == 0) {
+ savedTerms.add(new BytesRef(term));
+ System.out.println("TEST: add " + term);
+ nextSave = _TestUtil.nextInt(random, 500000, 1000000);
+ }
+ }
+ return savedTerms;
+ }
+
+ private void testSavedTerms(IndexReader r, List<BytesRef> terms) throws IOException {
+ System.out.println("TEST: run " + terms.size() + " terms on reader=" + r);
+ IndexSearcher s = new IndexSearcher(r);
+ Collections.shuffle(terms);
+ TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
+ boolean failed = false;
+ for(int iter=0;iter<10*terms.size();iter++) {
+ final BytesRef term = terms.get(random.nextInt(terms.size()));
+ System.out.println("TEST: search " + term);
+ final long t0 = System.currentTimeMillis();
+ final int count = s.search(new TermQuery(new Term("field", term)), 1).totalHits;
+ if (count <= 0) {
+ System.out.println(" FAILED: count=" + count);
+ failed = true;
+ }
+ final long t1 = System.currentTimeMillis();
+ System.out.println(" took " + (t1-t0) + " millis");
+
+ TermsEnum.SeekStatus result = termsEnum.seek(term);
+ if (result != TermsEnum.SeekStatus.FOUND) {
+ if (result == TermsEnum.SeekStatus.END) {
+ System.out.println(" FAILED: got END");
+ } else {
+ System.out.println(" FAILED: wrong term: got " + termsEnum.term());
+ }
+ failed = true;
+ }
+ }
+ assertFalse(failed);
}
}