You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/11/27 03:13:41 UTC

svn commit: r1642007 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/ lucene/analysis/common/src/...

Author: rmuir
Date: Thu Nov 27 02:13:40 2014
New Revision: 1642007

URL: http://svn.apache.org/r1642007
Log:
backport analysis test speedups

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java Thu Nov 27 02:13:40 2014
@@ -246,6 +246,7 @@ public class TestBugInSomething extends 
   }
   
   // LUCENE-5269
+  @Slow
   public void testUnicodeShinglesAndNgrams() throws Exception {
     Analyzer analyzer = new Analyzer() {
       @Override

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java Thu Nov 27 02:13:40 2014
@@ -77,7 +77,7 @@ public class TestFactories extends BaseT
       
       // beast it just a little, it shouldnt throw exceptions:
       // (it should have thrown them in initialize)
-      checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
+      checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 20, 20, false, false);
     }
   }
   
@@ -97,7 +97,7 @@ public class TestFactories extends BaseT
       
       // beast it just a little, it shouldnt throw exceptions:
       // (it should have thrown them in initialize)
-      checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
+      checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 20, 20, false, false);
     }
   }
   
@@ -117,7 +117,7 @@ public class TestFactories extends BaseT
       
       // beast it just a little, it shouldnt throw exceptions:
       // (it should have thrown them in initialize)
-      checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
+      checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 20, 20, false, false);
     }
   }
   

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Thu Nov 27 02:13:40 2014
@@ -900,7 +900,7 @@ public class TestRandomChains extends Ba
   }
   
   public void testRandomChains() throws Throwable {
-    int numIterations = atLeast(20);
+    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
     Random random = random();
     for (int i = 0; i < numIterations; i++) {
       MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
@@ -919,7 +919,7 @@ public class TestRandomChains extends Ba
   
   // we might regret this decision...
   public void testRandomChainsWithLargeStrings() throws Throwable {
-    int numIterations = atLeast(20);
+    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
     Random random = random();
     for (int i = 0; i < numIterations; i++) {
       MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Thu Nov 27 02:13:40 2014
@@ -355,7 +355,7 @@ public class TestWordDelimiterFilter ext
         }
       };
       // TODO: properly support positionLengthAttribute
-      checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
+      checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20, false, false);
     }
   }
   
@@ -380,7 +380,7 @@ public class TestWordDelimiterFilter ext
         }
       };
       // TODO: properly support positionLengthAttribute
-      checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192, false, false);
+      checkRandomData(random(), a, 20*RANDOM_MULTIPLIER, 8192, false, false);
     }
   }
   

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java Thu Nov 27 02:13:40 2014
@@ -99,7 +99,8 @@ public class EdgeNGramTokenizerTest exte
   
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
-    for (int i = 0; i < 10; i++) {
+    int numIters = TEST_NIGHTLY ? 10 : 1;
+    for (int i = 0; i < numIters; i++) {
       final int min = TestUtil.nextInt(random(), 2, 10);
       final int max = TestUtil.nextInt(random(), min, 20);
       

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Thu Nov 27 02:13:40 2014
@@ -110,7 +110,8 @@ public class NGramTokenizerTest extends 
   
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
-    for (int i = 0; i < 10; i++) {
+    int numIters = TEST_NIGHTLY ? 10 : 1;
+    for (int i = 0; i < numIters; i++) {
       final int min = TestUtil.nextInt(random(), 2, 10);
       final int max = TestUtil.nextInt(random(), min, 20);
       Analyzer a = new Analyzer() {

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Thu Nov 27 02:13:40 2014
@@ -130,6 +130,6 @@ public class TestSnowball extends BaseTo
         return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
       }  
     };
-    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
+    checkRandomData(random(), a, 100*RANDOM_MULTIPLIER);
   }
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java Thu Nov 27 02:13:40 2014
@@ -24,12 +24,14 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.Slow;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 
 /**
  * Test the snowball filters against the snowball data tests
  */
+@Slow
 public class TestSnowballVocab extends LuceneTestCase {
   /**
    * Run all languages against their snowball vocabulary tests.

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java Thu Nov 27 02:13:40 2014
@@ -35,6 +35,7 @@ import org.apache.lucene.util.Version;
 public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
 
   // LUCENE-5897: slow tokenization of strings of the form (\p{WB:ExtendNumLet}[\p{WB:Format}\p{WB:Extend}]*)+
+  @Slow
   public void testLargePartiallyMatchingToken() throws Exception {
     // TODO: get these lists of chars matching a property from ICU4J
     // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt

Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java Thu Nov 27 02:13:40 2014
@@ -39,6 +39,7 @@ import java.util.Random;
 public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
 
   // LUCENE-5440: extremely slow tokenization of text matching email <local-part> (before the '@')
+  @Slow
   public void testLongEMAILatomText() throws Exception {
     // EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
     char[] emailAtomChars

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1642007&r1=1642006&r2=1642007&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Thu Nov 27 02:13:40 2014
@@ -17,6 +17,7 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -94,6 +95,7 @@ import org.apache.lucene.store.FSDirecto
 import org.apache.lucene.store.FilterDirectory;
 import org.apache.lucene.store.NoLockFactory;
 import org.junit.Assert;
+
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 
@@ -111,6 +113,7 @@ public final class TestUtil {
    * Closes the given InputStream after extracting! 
    */
   public static void unzip(InputStream in, Path destDir) throws IOException {
+    in = new BufferedInputStream(in);
     IOUtils.rm(destDir);
     Files.createDirectory(destDir);