You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/11/27 03:13:41 UTC
svn commit: r1642007 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/test/org/apache/lucene/analysis/core/
lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/
lucene/analysis/common/src/...
Author: rmuir
Date: Thu Nov 27 02:13:40 2014
New Revision: 1642007
URL: http://svn.apache.org/r1642007
Log:
backport analysis test speedups
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
lucene/dev/branches/branch_5x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java Thu Nov 27 02:13:40 2014
@@ -246,6 +246,7 @@ public class TestBugInSomething extends
}
// LUCENE-5269
+ @Slow
public void testUnicodeShinglesAndNgrams() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java Thu Nov 27 02:13:40 2014
@@ -77,7 +77,7 @@ public class TestFactories extends BaseT
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
- checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
+ checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 20, 20, false, false);
}
}
@@ -97,7 +97,7 @@ public class TestFactories extends BaseT
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
- checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 20, 20, false, false);
}
}
@@ -117,7 +117,7 @@ public class TestFactories extends BaseT
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
- checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 20, 20, false, false);
}
}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Thu Nov 27 02:13:40 2014
@@ -900,7 +900,7 @@ public class TestRandomChains extends Ba
}
public void testRandomChains() throws Throwable {
- int numIterations = atLeast(20);
+ int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
Random random = random();
for (int i = 0; i < numIterations; i++) {
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
@@ -919,7 +919,7 @@ public class TestRandomChains extends Ba
// we might regret this decision...
public void testRandomChainsWithLargeStrings() throws Throwable {
- int numIterations = atLeast(20);
+ int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
Random random = random();
for (int i = 0; i < numIterations; i++) {
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Thu Nov 27 02:13:40 2014
@@ -355,7 +355,7 @@ public class TestWordDelimiterFilter ext
}
};
// TODO: properly support positionLengthAttribute
- checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
+ checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20, false, false);
}
}
@@ -380,7 +380,7 @@ public class TestWordDelimiterFilter ext
}
};
// TODO: properly support positionLengthAttribute
- checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192, false, false);
+ checkRandomData(random(), a, 20*RANDOM_MULTIPLIER, 8192, false, false);
}
}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java Thu Nov 27 02:13:40 2014
@@ -99,7 +99,8 @@ public class EdgeNGramTokenizerTest exte
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
- for (int i = 0; i < 10; i++) {
+ int numIters = TEST_NIGHTLY ? 10 : 1;
+ for (int i = 0; i < numIters; i++) {
final int min = TestUtil.nextInt(random(), 2, 10);
final int max = TestUtil.nextInt(random(), min, 20);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Thu Nov 27 02:13:40 2014
@@ -110,7 +110,8 @@ public class NGramTokenizerTest extends
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
- for (int i = 0; i < 10; i++) {
+ int numIters = TEST_NIGHTLY ? 10 : 1;
+ for (int i = 0; i < numIters; i++) {
final int min = TestUtil.nextInt(random(), 2, 10);
final int max = TestUtil.nextInt(random(), min, 20);
Analyzer a = new Analyzer() {
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Thu Nov 27 02:13:40 2014
@@ -130,6 +130,6 @@ public class TestSnowball extends BaseTo
return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
}
};
- checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
+ checkRandomData(random(), a, 100*RANDOM_MULTIPLIER);
}
}
\ No newline at end of file
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java Thu Nov 27 02:13:40 2014
@@ -24,12 +24,14 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.Slow;
import static org.apache.lucene.analysis.VocabularyAssert.*;
/**
* Test the snowball filters against the snowball data tests
*/
+@Slow
public class TestSnowballVocab extends LuceneTestCase {
/**
* Run all languages against their snowball vocabulary tests.
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java Thu Nov 27 02:13:40 2014
@@ -35,6 +35,7 @@ import org.apache.lucene.util.Version;
public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
// LUCENE-5897: slow tokenization of strings of the form (\p{WB:ExtendNumLet}[\p{WB:Format}\p{WB:Extend}]*)+
+ @Slow
public void testLargePartiallyMatchingToken() throws Exception {
// TODO: get these lists of chars matching a property from ICU4J
// http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java Thu Nov 27 02:13:40 2014
@@ -39,6 +39,7 @@ import java.util.Random;
public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
// LUCENE-5440: extremely slow tokenization of text matching email <local-part> (before the '@')
+ @Slow
public void testLongEMAILatomText() throws Exception {
// EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
char[] emailAtomChars
Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Thu Nov 27 02:13:40 2014
@@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -94,6 +95,7 @@ import org.apache.lucene.store.FSDirecto
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.junit.Assert;
+
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
@@ -111,6 +113,7 @@ public final class TestUtil {
* Closes the given InputStream after extracting!
*/
public static void unzip(InputStream in, Path destDir) throws IOException {
+ in = new BufferedInputStream(in);
IOUtils.rm(destDir);
Files.createDirectory(destDir);