You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/09 21:29:26 UTC
svn commit: r1242509 - in /lucene/dev/branches/branch_3x:
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/
lucene/contrib/analyzers/common/src/test/org/apach...
Author: rmuir
Date: Thu Feb 9 20:29:25 2012
New Revision: 1242509
URL: http://svn.apache.org/viewvc?rev=1242509&view=rev
Log:
LUCENE-3765: Trappy behavior with StopFilter/ignoreCase
Modified:
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -59,6 +59,9 @@ import java.util.Map;
* <p>You must specify the required {@link Version}
* compatibility when creating DutchAnalyzer:
* <ul>
+ * <li> As of 3.6, {@link #DutchAnalyzer(Version, Set)} and
+ * {@link #DutchAnalyzer(Version, Set, Set)} also populate
+ * the default entries for the stem override dictionary
* <li> As of 3.1, Snowball stemming is done with SnowballFilter,
* LowerCaseFilter is used prior to StopFilter, and Snowball
* stopwords are used by default.
@@ -99,6 +102,7 @@ public final class DutchAnalyzer extends
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
+ static final CharArrayMap<String> DEFAULT_STEM_DICT;
static {
try {
@@ -109,6 +113,12 @@ public final class DutchAnalyzer extends
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
+
+ DEFAULT_STEM_DICT = new CharArrayMap<String>(Version.LUCENE_CURRENT, 4, false);
+ DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
+ DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
+ DEFAULT_STEM_DICT.put("ei", "eier");
+ DEFAULT_STEM_DICT.put("kind", "kinder");
}
}
@@ -124,7 +134,7 @@ public final class DutchAnalyzer extends
*/
private Set<?> excltable = Collections.emptySet();
- private Map<Object,String> stemdict = CharArrayMap.emptyMap();
+ private CharArrayMap<String> stemdict = CharArrayMap.emptyMap();
private final Version matchVersion;
/**
@@ -133,22 +143,33 @@ public final class DutchAnalyzer extends
*
*/
public DutchAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
- stemdict = new CharArrayMap<String>(matchVersion, 16, false);
- stemdict.put("fiets", "fiets"); //otherwise fiet
- stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
- stemdict.put("ei", "eier");
- stemdict.put("kind", "kinder");
+ // historically, only this ctor populated the stem dict!!!!!
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
public DutchAnalyzer(Version matchVersion, Set<?> stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ // historically, this ctor never the stem dict!!!!!
+ // so we populate it only for >= 3.6
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET,
+ matchVersion.onOrAfter(Version.LUCENE_36)
+ ? DefaultSetHolder.DEFAULT_STEM_DICT
+ : CharArrayMap.<String>emptyMap());
}
public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
- stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
- excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+ // historically, this ctor never the stem dict!!!!!
+ // so we populate it only for >= 3.6
+ this(matchVersion, stopwords, stemExclusionTable,
+ matchVersion.onOrAfter(Version.LUCENE_36)
+ ? DefaultSetHolder.DEFAULT_STEM_DICT
+ : CharArrayMap.<String>emptyMap());
+ }
+
+ public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
this.matchVersion = matchVersion;
+ this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+ this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+ this.stemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
}
/**
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,9 +18,6 @@ package org.apache.lucene.analysis.ar;
*/
import java.io.IOException;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
@@ -79,16 +76,14 @@ public class TestArabicAnalyzer extends
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
- Set<String> set = new HashSet<String>();
- Collections.addAll(set, "the", "and", "a");
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
public void testWithStemExclusionSet() throws IOException {
- Set<String> set = new HashSet<String>();
- set.add("ساÙدÙات");
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساÙدÙات"), false);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙدÙات" });
assertAnalyzesToReuse(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙدÙات" });
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.bg;
*/
import java.io.IOException;
-import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -43,8 +42,7 @@ public class TestBulgarianAnalyzer exten
}
public void testCustomStopwords() throws IOException {
- Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
- .emptySet());
+ Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "Ðак Ñе казваÑ?",
new String[] {"как", "Ñе", "казваÑ"});
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Thu Feb 9 20:29:25 2012
@@ -135,8 +135,8 @@ public class TestBrazilianStemmer extend
}
public void testStemExclusionTable() throws Exception {
- BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
- a.setStemExclusionTable("quintessência");
+ BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT,
+ CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("quintessência"), false));
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ca;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -50,8 +49,7 @@ public class TestCatalanAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("llengües");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false);
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT,
CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "llengües", "llengües");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.da;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestDanishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("undersøgelse");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false);
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT,
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -77,6 +77,12 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "tischen", "tischen");
}
+ public void testStemExclusionTable() throws Exception {
+ GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET,
+ new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false));
+ checkOneTermReuse(a, "tischen", "tischen");
+ }
+
/** test some features of the new snowball filter
* these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
*/
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.en;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -45,8 +44,7 @@ public class TestEnglishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("books");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false);
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT,
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "books", "books");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.es;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestSpanishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("chicano");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false);
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT,
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "chicana", "chican");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.eu;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestBasqueAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("zaldiak");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false);
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT,
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "zaldiak", "zaldiak");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.fa;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
/**
* Test the Persian Analyzer
@@ -215,7 +216,8 @@ public class TestPersianAnalyzer extends
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
- PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, "the", "and", "a");
+ PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT,
+ new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false));
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.fi;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestFinnishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("edeltäjistään");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false);
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT,
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Thu Feb 9 20:29:25 2012
@@ -20,15 +20,14 @@ package org.apache.lucene.analysis.fr;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharArraySet;
/**
*
@@ -38,9 +37,7 @@ public class TestElision extends BaseTok
public void testElision() throws Exception {
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
- Set<String> articles = new HashSet<String>();
- articles.add("l");
- articles.add("M");
+ CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
List<String> tas = filter(filter);
assertEquals("embrouille", tas.get(4));
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.gl;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestGalicianAnalyzer extend
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("correspondente");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false);
Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT,
GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "correspondente", "correspondente");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -1,10 +1,8 @@
package org.apache.lucene.analysis.hi;
-import java.util.HashSet;
-import java.util.Set;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,8 +39,7 @@ public class TestHindiAnalyzer extends B
}
public void testExclusionSet() throws Exception {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("हिà¤à¤¦à¥");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिà¤à¤¦à¥"), false);
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT,
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिà¤à¤¦à¥", "हिà¤à¤¦à¥");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hu;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestHungarianAnalyzer exten
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("babakocsi");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false);
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT,
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "babakocsi", "babakocsi");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hy;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestArmenianAnalyzer extend
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö"), false);
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT,
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö", "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.id;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestIndonesianAnalyzer exte
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("peledakan");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false);
Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT,
IndonesianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "peledakan", "peledakan");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.it;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
@@ -44,8 +43,7 @@ public class TestItalianAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("abbandonata");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false);
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "abbandonata", "abbandonata");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.lv;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestLatvianAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("tirgiem");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false);
Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT,
LatvianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "tirgiem", "tirgiem");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Thu Feb 9 20:29:25 2012
@@ -2,12 +2,11 @@ package org.apache.lucene.analysis.misce
import java.io.IOException;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.CharArrayMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -33,7 +32,7 @@ public class TestStemmerOverrideFilter e
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
- Map<String,String> dictionary = new HashMap<String,String>();
+ CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
dictionary.put("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
TokenStream stream = new PorterStemFilter(
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Thu Feb 9 20:29:25 2012
@@ -175,6 +175,26 @@ public class TestDutchStemmer extends Ba
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
}
+ /**
+ * check that the default stem overrides are used
+ * even if you use a non-default ctor.
+ */
+ public void testStemOverrides() throws IOException {
+ DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+ checkOneTerm(a, "fiets", "fiets");
+ }
+
+ /**
+ * prior to 3.6, this confusingly did not happen if
+ * you specified your own stoplist!!!!
+ * @deprecated (3.6) Remove this test in Lucene 5.0
+ */
+ @Deprecated
+ public void testBuggyStemOverrides() throws IOException {
+ DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_35, CharArraySet.EMPTY_SET);
+ checkOneTerm(a, "fiets", "fiet");
+ }
+
/**
* Prior to 3.1, this analyzer had no lowercase filter.
* stopwords were case sensitive. Preserve this for back compat.
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.no;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestNorwegianAnalyzer exten
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("havnedistriktene");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false);
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT,
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pt;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestPortugueseAnalyzer exte
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("quilométricas");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "quilométricas", "quilométricas");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ro;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestRomanianAnalyzer extend
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("absenţa");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false);
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "absenţa", "absenţa");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,12 +18,10 @@ package org.apache.lucene.analysis.sv;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.hu.HungarianAnalyzer;
+import org.apache.lucene.analysis.CharArraySet;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -44,8 +42,7 @@ public class TestSwedishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("jaktkarlarne");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false);
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.tr;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestTurkishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("aÄacı");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("aÄacı"), false);
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "aÄacı", "aÄacı");
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pl;
*/
import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -43,8 +42,7 @@ public class TestPolishAnalyzer extends
/** test use of exclusion set */
public void testExclude() throws IOException {
- Set<String> exclusionSet = new HashSet<String>();
- exclusionSet.add("studenta");
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);;
Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT,
PolishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "studenta", "studenta");
Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java Thu Feb 9 20:29:25 2012
@@ -58,7 +58,7 @@ public final class StopFilter extends Fi
* @param input Input TokenStream
* @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
* @param ignoreCase if true, all words are lower cased first
- * @deprecated use {@link #StopFilter(Version, TokenStream, Set, boolean)} instead
+ * @deprecated Use {@link #StopFilter(Version, TokenStream, Set)} instead
*/
@Deprecated
public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
@@ -87,7 +87,9 @@ public final class StopFilter extends Fi
* representing the stopwords
* @param ignoreCase
* if true, all words are lower cased first
+ * @deprecated Use {@link #StopFilter(Version, TokenStream, Set)} instead
*/
+ @Deprecated
public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
{
this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_29), input, stopWords, ignoreCase);
Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -91,7 +91,10 @@ public final class ClassicAnalyzer exten
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
- * @param stopwords File to read stop words from */
+ * @param stopwords File to read stop words from
+ * @deprecated Use {@link #ClassicAnalyzer(Version, Reader)} instead.
+ */
+ @Deprecated
public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
IOUtils.CHARSET_UTF_8), matchVersion));
Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Thu Feb 9 20:29:25 2012
@@ -87,7 +87,10 @@ public final class StandardAnalyzer exte
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
- * @param stopwords File to read stop words from */
+ * @param stopwords File to read stop words from
+ * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead.
+ */
+ @Deprecated
public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
IOUtils.CHARSET_UTF_8), matchVersion));
Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Thu Feb 9 20:29:25 2012
@@ -805,7 +805,11 @@ public abstract class LuceneTestCase ext
public static void assumeNoException(String msg, Exception e) {
Assume.assumeNoException(e == null ? null : new _TestIgnoredException(msg, e));
}
-
+
+ public static <T> Set<T> asSet(T... args) {
+ return new HashSet<T>(Arrays.asList(args));
+ }
+
/**
* Convenience method for logging an iterator.
*
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java Thu Feb 9 20:29:25 2012
@@ -67,7 +67,7 @@ public final class CommonGramsFilter ext
this(Version.LUCENE_29, input, commonWords);
}
- /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set, boolean)} instead */
+ /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead */
@Deprecated
public CommonGramsFilter(TokenStream input, Set<?> commonWords, boolean ignoreCase) {
this(Version.LUCENE_29, input, commonWords, ignoreCase);
@@ -102,7 +102,9 @@ public final class CommonGramsFilter ext
* @param input TokenStream input in filter chain.
* @param commonWords The set of common words.
* @param ignoreCase -Ignore case when constructing bigrams for common words.
+ * @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead
*/
+ @Deprecated
public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords, boolean ignoreCase) {
super(input);
if (commonWords instanceof CharArraySet) {