You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/26 01:40:46 UTC
svn commit: r1305177 [2/2] - in /lucene/dev/trunk/modules/analysis:
common/src/java/org/apache/lucene/analysis/cz/
common/src/test/org/apache/lucene/analysis/ar/
common/src/test/org/apache/lucene/analysis/bg/
common/src/test/org/apache/lucene/analysis/...
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Sun Mar 25 23:40:44 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@@ -70,4 +71,15 @@ public class TestPortugueseStemFilter ex
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PortugueseStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Sun Mar 25 23:40:44 2012
@@ -17,6 +17,7 @@
package org.apache.lucene.analysis.reverse;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -25,6 +26,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
@@ -111,4 +113,15 @@ public class TestReverseStringFilter ext
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Sun Mar 25 23:40:44 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -49,4 +50,15 @@ public class TestRussianLightStemFilter
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new RussianLightStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Sun Mar 25 23:40:44 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
@@ -1156,4 +1157,15 @@ public class ShingleFilterTest extends B
};
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Sun Mar 25 23:40:44 2012
@@ -17,10 +17,15 @@ package org.apache.lucene.analysis.snowb
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.index.Payload;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
@@ -136,4 +141,23 @@ public class TestSnowball extends BaseTo
return true;
}
}
+
+ public void testEmptyTerm() throws IOException {
+ String langs[] = {
+ "Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
+ "Finnish", "French", "German2", "German", "Hungarian", "Irish",
+ "Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
+ "Romanian", "Russian", "Spanish", "Swedish", "Turkish"
+ };
+ for (final String lang : langs) {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Sun Mar 25 23:40:44 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -49,4 +50,15 @@ public class TestSwedishLightStemFilter
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Sun Mar 25 23:40:44 2012
@@ -17,6 +17,7 @@
package org.apache.lucene.analysis.synonym;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
@@ -32,6 +33,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util._TestUtil;
@@ -428,6 +430,29 @@ public class TestSynonymMapFilter extend
}
}
+ public void testEmptyTerm() throws IOException {
+ final int numIters = atLeast(10);
+ for (int i = 0; i < numIters; i++) {
+ b = new SynonymMap.Builder(random.nextBoolean());
+ final int numEntries = atLeast(10);
+ for (int j = 0; j < numEntries; j++) {
+ add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+ }
+ final SynonymMap map = b.build();
+ final boolean ignoreCase = random.nextBoolean();
+
+ final Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
+ }
+ };
+
+ checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
+ }
+ }
+
/** simple random test like testRandom2, but for large docs
*/
public void testRandomHuge() throws Exception {
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Sun Mar 25 23:40:44 2012
@@ -17,10 +17,15 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -183,4 +188,15 @@ public class TestThaiAnalyzer extends Ba
ts.addAttribute(FlagsAttribute.class);
assertTokenStreamContents(ts, new String[] { "ภาษา", "à¹à¸à¸¢" });
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ThaiWordFilter(TEST_VERSION_CURRENT, tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Sun Mar 25 23:40:44 2012
@@ -17,11 +17,16 @@ package org.apache.lucene.analysis.tr;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
/**
* Test the Turkish lowercase filter.
@@ -62,4 +67,15 @@ public class TestTurkishLowerCaseFilter
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
"\u0131\u0316sparta",});
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Sun Mar 25 23:40:44 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
/**
* Tests ICUFoldingFilter
@@ -77,4 +78,15 @@ public class TestICUFoldingFilter extend
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Sun Mar 25 23:40:44 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import com.ibm.icu.text.Normalizer2;
@@ -77,4 +78,15 @@ public class TestICUNormalizer2Filter ex
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Sun Mar 25 23:40:44 2012
@@ -100,4 +100,15 @@ public class TestICUTransformFilter exte
};
checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java Sun Mar 25 23:40:44 2012
@@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
public class TestKuromojiBaseFormFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@@ -47,4 +48,15 @@ public class TestKuromojiBaseFormFilter
public void testRandomStrings() throws IOException {
checkRandomData(random, analyzer, atLeast(10000));
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiBaseFormFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java Sun Mar 25 23:40:44 2012
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.io.IOException;
import java.io.Reader;
@@ -68,4 +69,15 @@ public class TestKuromojiKatakanaStemFil
public void testRandomData() throws IOException {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiKatakanaStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java Sun Mar 25 23:40:44 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.kurom
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.io.IOException;
import java.io.Reader;
@@ -61,4 +62,15 @@ public class TestKuromojiReadingFormFilt
checkRandomData(random, katakanaAnalyzer, 1000*RANDOM_MULTIPLIER);
checkRandomData(random, romajiAnalyzer, 1000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiReadingFormFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java Sun Mar 25 23:40:44 2012
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.phonetic;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -24,7 +25,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.util._TestUtil;
@@ -95,4 +96,15 @@ public class DoubleMetaphoneFilterTest e
};
checkRandomData(random, b, 1000 * RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random.nextBoolean()));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Sun Mar 25 23:40:44 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.phone
* limitations under the License.
*/
+import java.io.IOException;
import java.io.Reader;
import java.util.HashSet;
@@ -28,6 +29,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.junit.Ignore;
/** Tests {@link BeiderMorseFilter} */
@@ -91,4 +93,15 @@ public class TestBeiderMorseFilter exten
public void testRandom() throws Exception {
checkRandomData(random, analyzer, 1000 * RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Sun Mar 25 23:40:44 2012
@@ -31,6 +31,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
@@ -103,4 +104,20 @@ public class TestPhoneticFilter extends
checkRandomData(random, b, 1000*RANDOM_MULTIPLIER);
}
}
+
+ public void testEmptyTerm() throws IOException {
+ Encoder encoders[] = new Encoder[] {
+ new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone()
+ };
+ for (final Encoder e : encoders) {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random.nextBoolean()));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
+ }
}
Modified: lucene/dev/trunk/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1305177&r1=1305176&r2=1305177&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Sun Mar 25 23:40:44 2012
@@ -17,6 +17,7 @@
package org.apache.lucene.analysis.cn.smart;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -26,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.util.Version;
@@ -228,4 +230,15 @@ public class TestSmartChineseAnalyzer ex
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new WordTokenFilter(tokenizer));
+ }
+ };
+ checkAnalysisConsistency(random, a, random.nextBoolean(), "");
+ }
}