You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/26 03:01:23 UTC
svn commit: r1305186 [2/2] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/
lucene/contrib/analyzers/common/s...
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@
package org.apache.lucene.analysis.synonym;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
@@ -32,6 +33,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.CharsRef;
@@ -429,6 +431,29 @@ public class TestSynonymMapFilter extend
}
}
+ public void testEmptyTerm() throws IOException {
+ final int numIters = atLeast(10);
+ for (int i = 0; i < numIters; i++) {
+ b = new SynonymMap.Builder(random.nextBoolean());
+ final int numEntries = atLeast(10);
+ for (int j = 0; j < numEntries; j++) {
+ add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+ }
+ final SynonymMap map = b.build();
+ final boolean ignoreCase = random.nextBoolean();
+
+ final Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
+ }
+ };
+
+ checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
+ }
+ }
+
/** simple random test like testRandom2, but for large docs
*/
public void testRandomHuge() throws Exception {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Mon Mar 26 01:01:21 2012
@@ -17,11 +17,17 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.Version;
@@ -184,4 +190,15 @@ public class TestThaiAnalyzer extends Ba
ts.addAttribute(FlagsAttribute.class);
assertTokenStreamContents(ts, new String[] { "ภาษา", "à¹à¸à¸¢" });
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ThaiWordFilter(TEST_VERSION_CURRENT, tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Mon Mar 26 01:01:21 2012
@@ -17,11 +17,17 @@ package org.apache.lucene.analysis.tr;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
/**
* Test the Turkish lowercase filter.
@@ -62,4 +68,15 @@ public class TestTurkishLowerCaseFilter
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
"\u0131\u0316sparta",});
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiBaseFormFilter.java Mon Mar 26 01:01:21 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
public class TestKuromojiBaseFormFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
@@ -48,4 +49,15 @@ public class TestKuromojiBaseFormFilter
public void testRandomStrings() throws IOException {
checkRandomData(random, analyzer, atLeast(10000));
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiBaseFormFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiKatakanaStemFilter.java Mon Mar 26 01:01:21 2012
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import java.io.IOException;
import java.io.Reader;
@@ -69,4 +70,15 @@ public class TestKuromojiKatakanaStemFil
public void testRandomData() throws IOException {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiKatakanaStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java Mon Mar 26 01:01:21 2012
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import java.io.IOException;
import java.io.Reader;
@@ -62,4 +63,15 @@ public class TestKuromojiReadingFormFilt
checkRandomData(random, katakanaAnalyzer, 1000*RANDOM_MULTIPLIER);
checkRandomData(random, romajiAnalyzer, 1000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KuromojiReadingFormFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java Mon Mar 26 01:01:21 2012
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.phonetic;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -26,6 +27,7 @@ import org.apache.lucene.analysis.Reusab
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.util._TestUtil;
public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase {
@@ -95,4 +97,15 @@ public class DoubleMetaphoneFilterTest e
};
checkRandomData(random, b, 1000 * RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random.nextBoolean()));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.phone
* limitations under the License.
*/
+import java.io.IOException;
import java.io.Reader;
import java.util.HashSet;
@@ -29,6 +30,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.junit.Ignore;
/** Tests {@link BeiderMorseFilter} */
@@ -92,4 +94,15 @@ public class TestBeiderMorseFilter exten
public void testRandom() throws Exception {
checkRandomData(random, analyzer, 1000 * RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Mon Mar 26 01:01:21 2012
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
/**
* Tests {@link PhoneticFilter}
@@ -104,4 +105,20 @@ public class TestPhoneticFilter extends
checkRandomData(random, b, 1000*RANDOM_MULTIPLIER);
}
}
+
+ public void testEmptyTerm() throws IOException {
+ Encoder encoders[] = new Encoder[] {
+ new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone()
+ };
+ for (final Encoder e : encoders) {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random.nextBoolean()));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@
package org.apache.lucene.analysis.cn.smart;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -28,6 +29,7 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.util.Version;
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
@@ -229,4 +231,15 @@ public class TestSmartChineseAnalyzer ex
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new WordTokenFilter(tokenizer));
+ }
+ };
+ checkAnalysisConsistency(random, a, random.nextBoolean(), "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Mon Mar 26 01:01:21 2012
@@ -77,4 +77,15 @@ public class TestICUFoldingFilter extend
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Mon Mar 26 01:01:21 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.KeywordTokenizer;
import com.ibm.icu.text.Normalizer2;
@@ -77,4 +78,15 @@ public class TestICUNormalizer2Filter ex
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Mon Mar 26 01:01:21 2012
@@ -101,4 +101,15 @@ public class TestICUTransformFilter exte
};
checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java Mon Mar 26 01:01:21 2012
@@ -22,8 +22,10 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
@@ -1923,4 +1925,15 @@ public class TestASCIIFoldingFilter exte
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestLengthFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestLengthFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestLengthFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
@@ -40,5 +42,16 @@ public class TestLengthFilter extends Ba
new int[]{1, 4, 2}
);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new LengthFilter(true, tokenizer, 0, 5));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java Mon Mar 26 01:01:21 2012
@@ -62,4 +62,15 @@ public class TestPorterStemFilter extend
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PorterStemFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@
package org.apache.solr.analysis;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
@@ -30,6 +31,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
/**
@@ -233,4 +235,19 @@ public class TestCapitalizationFilter ex
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.init(args);
+ TokenStream filter = factory.create(tokenizer);
+ return new TokenStreamComponents(tokenizer, filter);
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java Mon Mar 26 01:01:21 2012
@@ -17,6 +17,7 @@
package org.apache.solr.analysis;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -25,6 +26,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
/**
* HyphenatedWordsFilter test
@@ -76,4 +78,15 @@ public class TestHyphenatedWordsFilter e
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java Mon Mar 26 01:01:21 2012
@@ -23,7 +23,9 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Pattern;
@@ -105,5 +107,16 @@ public class TestPatternReplaceFilter ex
};
checkRandomData(random, b, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PatternReplaceFilter(tokenizer, Pattern.compile("a"), "b", true));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java Mon Mar 26 01:01:21 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Reusab
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -32,6 +33,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util._TestUtil;
+import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.Arrays;
@@ -166,5 +168,16 @@ public class TestRemoveDuplicatesTokenFi
checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
}
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(tokenizer));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestTrimFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestTrimFilter.java Mon Mar 26 01:01:21 2012
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
/**
@@ -145,4 +146,15 @@ public class TestTrimFilter extends Base
};
checkRandomData(random, b, 10000*RANDOM_MULTIPLIER);
}
+
+ public void testEmptyTerm() throws IOException {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, random.nextBoolean()));
+ }
+ };
+ checkOneTermReuse(a, "", "");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java?rev=1305186&r1=1305185&r2=1305186&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java Mon Mar 26 01:01:21 2012
@@ -29,6 +29,8 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -524,4 +526,26 @@ public class TestWordDelimiterFilter ext
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
}
+
+ public void testEmptyTerm() throws IOException {
+ for (int i = 0; i < 512; i++) {
+ final int flags = i;
+ final CharArraySet protectedWords;
+ if (random.nextBoolean()) {
+ protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
+ } else {
+ protectedWords = null;
+ }
+
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
+ }
+ };
+ // depending upon options, this thing may or may not preserve the empty term
+ checkAnalysisConsistency(random, a, random.nextBoolean(), "");
+ }
+ }
}