You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@lucene.apache.org by Robert Muir <rc...@gmail.com> on 2012/06/19 23:54:25 UTC

Re: svn commit: r1351829 - in /lucene/dev/trunk/lucene: analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

can we use the suppresscodecs annotation instead please?

Doing it with assumes in this way is really bad for development: you
run tests and they pass but not because they are correct, just because
they happen to trigger some various assume: really we should be
avoiding such assumes at all costs.

On Tue, Jun 19, 2012 at 4:08 PM,  <mi...@apache.org> wrote:
> Author: mikemccand
> Date: Tue Jun 19 20:08:50 2012
> New Revision: 1351829
>
> URL: http://svn.apache.org/viewvc?rev=1351829&view=rev
> Log:
> don't use Memory/SimpleText postings format when indexing too many tokens from random data
>
> Modified:
>    lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
>    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
>
> Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1351829&r1=1351828&r2=1351829&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
> +++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Tue Jun 19 20:08:50 2012
> @@ -36,11 +36,9 @@ import org.apache.lucene.analysis.ja.dic
>  import org.apache.lucene.analysis.ja.tokenattributes.*;
>  import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
>  import org.apache.lucene.util.IOUtils;
> -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
>  import org.apache.lucene.util.UnicodeUtil;
>  import org.apache.lucene.util._TestUtil;
>
> -@SuppressCodecs({ "Memory" })
>  public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
>
>   public static UserDictionary readDict() {
>
> Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1351829&r1=1351828&r2=1351829&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
> +++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Tue Jun 19 20:08:50 2012
> @@ -26,11 +26,11 @@ import java.io.StringReader;
>  import java.io.StringWriter;
>  import java.io.Writer;
>  import java.util.ArrayList;
> +import java.util.HashMap;
>  import java.util.HashSet;
>  import java.util.List;
> -import java.util.Random;
>  import java.util.Map;
> -import java.util.HashMap;
> +import java.util.Random;
>  import java.util.Set;
>
>  import org.apache.lucene.analysis.tokenattributes.*;
> @@ -45,10 +45,11 @@ import org.apache.lucene.store.Directory
>  import org.apache.lucene.util.Attribute;
>  import org.apache.lucene.util.AttributeImpl;
>  import org.apache.lucene.util.IOUtils;
> -import org.apache.lucene.util.LuceneTestCase;
>  import org.apache.lucene.util.LineFileDocs;
> -import org.apache.lucene.util._TestUtil;
> +import org.apache.lucene.util.LuceneTestCase;
>  import org.apache.lucene.util.Rethrow;
> +import org.apache.lucene.util._TestUtil;
> +import org.junit.Assume;
>
>  /**
>  * Base class for all Lucene unit tests that use TokenStreams.
> @@ -438,6 +439,10 @@ public abstract class BaseTokenStreamTes
>     Directory dir = null;
>     RandomIndexWriter iw = null;
>     if (rarely(random)) {
> +      final String postingsFormat =  _TestUtil.getPostingsFormat("dummy");
> +      Assume.assumeTrue(iterations * maxWordLength < 100000 ||
> +                        !(postingsFormat.equals("Memory") ||
> +                          postingsFormat.equals("SimpleText")));
>       dir = newFSDirectory(_TestUtil.getTempDir("bttc"));
>       iw = new RandomIndexWriter(new Random(seed), dir, a);
>     }
>
>



-- 
lucidimagination.com

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

Re: svn commit: r1351829 - in /lucene/dev/trunk/lucene: analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Posted by Dawid Weiss <da...@cs.put.poznan.pl>.

> And I agree we should avoid Assume: it's dangerous.

It decreases test coverage because certain runs may be ignored but it
certainly doesn't just silently run the test -- the result is an
assumption-ignored test which is shown properly in ANT and IntelliJ;
Eclipse has a long-standing issue with these being marked as
successful:

https://bugs.eclipse.org/bugs/show_bug.cgi?id=359944

They don't consider it their problem (incorrectly).

Dawid

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

Re: svn commit: r1351829 - in /lucene/dev/trunk/lucene: analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Posted by Michael McCandless <lu...@mikemccandless.com>.

On Tue, Jun 19, 2012 at 6:18 PM, Robert Muir <rc...@gmail.com> wrote:
> On Tue, Jun 19, 2012 at 6:13 PM, Michael McCandless
> <lu...@mikemccandless.com> wrote:
>> Ahhh good point: using the annotation, LuceneTestCase will simply pick
>> a codec that's allowed, and the test runs.
>>
>> But using an Assume somewhere inside means ... LuceneTestCase can pick
>> the "wrong" codec and then silently the test doesn't run.
>>
>> And, I hadn't realized I can add the annot to BaseTokenStreamTestCase
>> and all tests inheriting from it will respect that.  Though I guess
>> that means any test inheriting from BTSTC and not using
>> checkRandomData with biggish args loses some test coverage ... but
>> that seems the lesser evil here.
>
> Looking at your commit, i fixed it with a different approach: if your
> logic doesn't match, we just dont index anything (but still test the
> actual analyzer).

Ahh I like that solution!

And I agree we should avoid Assume: it's dangerous.

Mike McCandless

http://blog.mikemccandless.com

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

Re: svn commit: r1351829 - in /lucene/dev/trunk/lucene: analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Posted by Robert Muir <rc...@gmail.com>.

On Tue, Jun 19, 2012 at 6:13 PM, Michael McCandless
<lu...@mikemccandless.com> wrote:
> Ahhh good point: using the annotation, LuceneTestCase will simply pick
> a codec that's allowed, and the test runs.
>
> But using an Assume somewhere inside means ... LuceneTestCase can pick
> the "wrong" codec and then silently the test doesn't run.
>
> And, I hadn't realized I can add the annot to BaseTokenStreamTestCase
> and all tests inheriting from it will respect that.  Though I guess
> that means any test inheriting from BTSTC and not using
> checkRandomData with biggish args loses some test coverage ... but
> that seems the lesser evil here.
>

Looking at your commit, i fixed it with a different approach: if your
logic doesn't match, we just dont index anything (but still test the
actual analyzer).

-- 
lucidimagination.com

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

Re: svn commit: r1351829 - in /lucene/dev/trunk/lucene: analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Posted by Michael McCandless <lu...@mikemccandless.com>.

Ahhh good point: using the annotation, LuceneTestCase will simply pick
a codec that's allowed, and the test runs.

But using an Assume somewhere inside means ... LuceneTestCase can pick
the "wrong" codec and then silently the test doesn't run.

And, I hadn't realized I can add the annot to BaseTokenStreamTestCase
and all tests inheriting from it will respect that.  Though I guess
that means any test inheriting from BTSTC and not using
checkRandomData with biggish args loses some test coverage ... but
that seems the lesser evil here.

OK I'll cutover to annotation on BTSTC instead.

Mike McCandless

http://blog.mikemccandless.com

On Tue, Jun 19, 2012 at 5:54 PM, Robert Muir <rc...@gmail.com> wrote:
> can we use the suppresscodecs annotation instead please?
>
> Doing it with assumes in this way is really bad for development: you
> run tests and they pass but not because they are correct, just because
> they happen to trigger some various assume: really we should be
> avoiding such assumes at all costs.
>
> On Tue, Jun 19, 2012 at 4:08 PM,  <mi...@apache.org> wrote:
>> Author: mikemccand
>> Date: Tue Jun 19 20:08:50 2012
>> New Revision: 1351829
>>
>> URL: http://svn.apache.org/viewvc?rev=1351829&view=rev
>> Log:
>> don't use Memory/SimpleText postings format when indexing too many tokens from random data
>>
>> Modified:
>>    lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
>>    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
>>
>> Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
>> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1351829&r1=1351828&r2=1351829&view=diff
>> ==============================================================================
>> --- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
>> +++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Tue Jun 19 20:08:50 2012
>> @@ -36,11 +36,9 @@ import org.apache.lucene.analysis.ja.dic
>>  import org.apache.lucene.analysis.ja.tokenattributes.*;
>>  import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
>>  import org.apache.lucene.util.IOUtils;
>> -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
>>  import org.apache.lucene.util.UnicodeUtil;
>>  import org.apache.lucene.util._TestUtil;
>>
>> -@SuppressCodecs({ "Memory" })
>>  public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
>>
>>   public static UserDictionary readDict() {
>>
>> Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
>> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1351829&r1=1351828&r2=1351829&view=diff
>> ==============================================================================
>> --- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
>> +++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Tue Jun 19 20:08:50 2012
>> @@ -26,11 +26,11 @@ import java.io.StringReader;
>>  import java.io.StringWriter;
>>  import java.io.Writer;
>>  import java.util.ArrayList;
>> +import java.util.HashMap;
>>  import java.util.HashSet;
>>  import java.util.List;
>> -import java.util.Random;
>>  import java.util.Map;
>> -import java.util.HashMap;
>> +import java.util.Random;
>>  import java.util.Set;
>>
>>  import org.apache.lucene.analysis.tokenattributes.*;
>> @@ -45,10 +45,11 @@ import org.apache.lucene.store.Directory
>>  import org.apache.lucene.util.Attribute;
>>  import org.apache.lucene.util.AttributeImpl;
>>  import org.apache.lucene.util.IOUtils;
>> -import org.apache.lucene.util.LuceneTestCase;
>>  import org.apache.lucene.util.LineFileDocs;
>> -import org.apache.lucene.util._TestUtil;
>> +import org.apache.lucene.util.LuceneTestCase;
>>  import org.apache.lucene.util.Rethrow;
>> +import org.apache.lucene.util._TestUtil;
>> +import org.junit.Assume;
>>
>>  /**
>>  * Base class for all Lucene unit tests that use TokenStreams.
>> @@ -438,6 +439,10 @@ public abstract class BaseTokenStreamTes
>>     Directory dir = null;
>>     RandomIndexWriter iw = null;
>>     if (rarely(random)) {
>> +      final String postingsFormat =  _TestUtil.getPostingsFormat("dummy");
>> +      Assume.assumeTrue(iterations * maxWordLength < 100000 ||
>> +                        !(postingsFormat.equals("Memory") ||
>> +                          postingsFormat.equals("SimpleText")));
>>       dir = newFSDirectory(_TestUtil.getTempDir("bttc"));
>>       iw = new RandomIndexWriter(new Random(seed), dir, a);
>>     }
>>
>>
>
>
>
> --
> lucidimagination.com
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org