You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/22 22:41:38 UTC
svn commit: r1304082 - in /lucene/dev/branches/branch_3x: ./ solr/
solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/
solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/
solr/contrib/analysis-extras/src/test/org/apache/s...
Author: rmuir
Date: Thu Mar 22 21:41:38 2012
New Revision: 1304082
URL: http://svn.apache.org/viewvc?rev=1304082&view=rev
Log:
SOLR-2921: enable multitermqueries for these filters too
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java
lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
lucene/dev/branches/branch_3x/solr/core/ (props changed)
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -37,7 +37,7 @@ import com.ibm.icu.text.Transliterator;
* </ul>
* @see Transliterator
*/
-public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
+public class ICUTransformFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
private Transliterator transliterator;
// TODO: add support for custom rules
@@ -64,4 +64,9 @@ public class ICUTransformFilterFactory e
public TokenStream create(TokenStream input) {
return new ICUTransformFilter(input, transliterator);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml Thu Mar 22 21:41:38 2012
@@ -33,6 +33,13 @@
<filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
</analyzer>
</fieldType>
+
+ <fieldType name="text_icutransform" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
+ </analyzer>
+ </fieldType>
</types>
@@ -40,6 +47,7 @@
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
<field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+ <field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
</fields>
Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java Thu Mar 22 21:41:38 2012
@@ -17,7 +17,6 @@ package org.apache.solr.analysis;
* limitations under the License.
*/
-import org.apache.lucene.index.IndexWriter;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -30,7 +29,6 @@ public class TestFoldingMultitermExtrasQ
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
- IndexWriter iw;
int idx = 1;
// ICUFoldingFilterFactory
@@ -54,7 +52,10 @@ public class TestFoldingMultitermExtrasQ
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELÄ°F"));
assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
- assertU(optimize());
+ // ICUTransformFilterFactory
+ assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "РоÑÑийÑкаÑ"));
+
+ assertU(commit());
}
@Test
@@ -73,4 +74,8 @@ public class TestFoldingMultitermExtrasQ
assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
}
+
+ public void testICUTransform() {
+ assertQ(req("q", "content_icutransform:РоÑÑ*"), "//result[@numFound='1']");
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,9 +31,14 @@ import org.apache.lucene.analysis.ar.Ara
* </fieldType></pre>
* @version $Id$
*/
-public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
+public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public ArabicNormalizationFilter create(TokenStream input) {
return new ArabicNormalizationFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -33,9 +33,14 @@ import org.apache.lucene.analysis.cjk.CJ
* </fieldType></pre>
*/
-public class CJKWidthFilterFactory extends BaseTokenFilterFactory {
+public class CJKWidthFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new CJKWidthFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,9 +31,14 @@ import org.apache.lucene.analysis.de.Ger
* </analyzer>
* </fieldType></pre>
*/
-public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new GermanNormalizationFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.hi.Hin
* </fieldType></pre>
* @version $Id$
*/
-public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.in.Ind
* </fieldType></pre>
* @version $Id$
*/
-public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new IndicNormalizationFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -34,9 +34,14 @@ import org.apache.lucene.analysis.TokenS
* </fieldType></pre>
* @version $Id$
*/
-public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public PersianNormalizationFilter create(TokenStream input) {
return new PersianNormalizationFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml Thu Mar 22 21:41:38 2012
@@ -169,6 +169,42 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
+
+ <fieldType name="text_persian" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.PersianNormalizationFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_arabic" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_hindi" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.IndicNormalizationFilterFactory"/>
+ <filter class="solr.HindiNormalizationFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_german" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.GermanNormalizationFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_width" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.CJKWidthFilterFactory"/>
+ </analyzer>
+ </fieldType>
<fieldType name="text_isolatin" class="solr.TextField">
<analyzer>
@@ -211,6 +247,11 @@
<field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
<field name="content_russian" type="text_russian" indexed="true" stored="true"/>
<field name="content_isolatin" type="text_isolatin" indexed="true" stored="true"/>
+ <field name="content_persian" type="text_persian" indexed="true" stored="true"/>
+ <field name="content_arabic" type="text_arabic" indexed="true" stored="true"/>
+ <field name="content_hindi" type="text_hindi" indexed="true" stored="true"/>
+ <field name="content_german" type="text_german" indexed="true" stored="true"/>
+ <field name="content_width" type="text_width" indexed="true" stored="true"/>
<dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
<dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Thu Mar 22 21:41:38 2012
@@ -17,7 +17,6 @@ package org.apache.solr.search;
* limitations under the License.
*/
-import org.apache.lucene.index.IndexWriter;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -31,7 +30,6 @@ public class TestFoldingMultitermQuery e
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml", "schema-folding.xml");
- IndexWriter iw;
String docs[] = {
"abcdefg1 finger",
@@ -91,7 +89,22 @@ public class TestFoldingMultitermQuery e
assertU(adoc("id", Integer.toString(idx++), "content_isolatin", "YppÃÃÃ all is well"));
assertU(adoc("id", Integer.toString(idx++), "content_isolatin", "that's äll"));
- assertU(optimize());
+ // persian normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_persian", "ÙاÙ"));
+
+ // arabic normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_arabic", "رÙبرت"));
+
+ // hindi normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_hindi", "हिà¤à¤¦à¥"));
+ assertU(adoc("id", Integer.toString(idx++), "content_hindi", "à¤
ाà¤
ा"));
+
+ // german normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_german", "weissbier"));
+
+ // cjk width normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_width", "ï½³ï¾ï½¨ï½¯ï¾"));
+ assertU(commit());
}
@Test
@@ -299,4 +312,25 @@ public class TestFoldingMultitermQuery e
assertQ(req("q", "content_isolatin:äl*"), "//result[@numFound='2']");
assertQ(req("q", "content_isolatin:ál*"), "//result[@numFound='2']");
}
+
+ public void testPersian() {
+ assertQ(req("q", "content_persian:ÙاÛ*"), "//result[@numFound='1']");
+ }
+
+ public void testArabic() {
+ assertQ(req("q", "content_arabic:رÙبرÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙت*"), "//result[@numFound='1']");
+ }
+
+ public void testHindi() {
+ assertQ(req("q", "content_hindi:हिनà¥à¤¦à¥*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_hindi:à¤à¤*"), "//result[@numFound='1']");
+ }
+
+ public void testGerman() {
+ assertQ(req("q", "content_german:weiÃ*"), "//result[@numFound='1']");
+ }
+
+ public void testCJKWidth() {
+ assertQ(req("q", "content_width:ã´ã£*"), "//result[@numFound='1']");
+ }
}
\ No newline at end of file