You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/03/22 19:03:22 UTC
svn commit: r1303939 - in /lucene/dev/trunk/solr:
contrib/analysis-extras/src/java/org/apache/solr/analysis/
contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/
contrib/analysis-extras/src/test/org/apache/solr/analysis/
core/src/java/org/...
Author: erick
Date: Thu Mar 22 18:03:21 2012
New Revision: 1303939
URL: http://svn.apache.org/viewvc?rev=1303939&view=rev
Log:
Fixes for SOLR-2921 (making more components MultiTermAware)
Added:
lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
Modified:
lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java
lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
Modified: lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java (original)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -21,10 +21,14 @@ import org.apache.lucene.analysis.icu.IC
*/
/** Factory for {@link ICUFoldingFilter} */
-public class ICUFoldingFilterFactory extends BaseTokenFilterFactory {
+public class ICUFoldingFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
@Override
public TokenStream create(TokenStream input) {
return new ICUFoldingFilter(input);
}
+
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java Thu Mar 22 18:03:21 2012
@@ -44,7 +44,7 @@ import com.ibm.icu.text.UnicodeSet;
* @see Normalizer2
* @see FilteredNormalizer2
*/
-public class ICUNormalizer2FilterFactory extends BaseTokenFilterFactory {
+public class ICUNormalizer2FilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
private Normalizer2 normalizer;
// TODO: support custom normalization
@@ -78,4 +78,8 @@ public class ICUNormalizer2FilterFactory
public TokenStream create(TokenStream input) {
return new ICUNormalizer2Filter(input, normalizer);
}
+
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml?rev=1303939&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml Thu Mar 22 18:03:21 2012
@@ -0,0 +1,49 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<schema name="test" version="1.0">
+ <types>
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+
+ <fieldType name="text_icufolding" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUFoldingFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_icunormalizer2" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
+ </analyzer>
+ </fieldType>
+
+ </types>
+
+ <fields>
+ <field name="id" type="string" indexed="true" stored="true" required="true"/>
+ <field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
+ <field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+
+ </fields>
+
+ <defaultSearchField>id</defaultSearchField>
+ <uniqueKey>id</uniqueKey>
+
+</schema>
Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java?rev=1303939&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java Thu Mar 22 18:03:21 2012
@@ -0,0 +1,77 @@
+package org.apache.solr.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
+
+ public String getCoreName() {
+ return "basic";
+ }
+
+ @BeforeClass
+ public static void beforeTests() throws Exception {
+ initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
+ IndexWriter iw;
+
+ int idx = 1;
+ // ICUFoldingFilterFactory
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "BadMagicICUFolding"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "RuÃ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ÎÎΪÎΣ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ÎάÏοÏ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "résumé"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "re\u0301sume\u0301"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ELÄ°F"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "eli\u0307f"));
+
+ // ICUNormalizer2FilterFactory
+
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "BadMagicICUFolding"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "RuÃ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ÎÎΪÎΣ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ÎάÏοÏ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "résumé"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "re\u0301sume\u0301"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELÄ°F"));
+ assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
+
+ assertU(optimize());
+ }
+
+ @Test
+ public void testICUFolding() {
+ assertQ(req("q", "content_icufolding:BadMagicicuFold*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_icufolding:rU*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_icufolding:Re*Me"), "//result[@numFound='2']");
+ assertQ(req("q", "content_icufolding:RE\u0301su*"), "//result[@numFound='2']");
+ assertQ(req("q", "content_icufolding:El*"), "//result[@numFound='2']");
+ }
+ @Test
+ public void testICUNormalizer2() {
+ assertQ(req("q", "content_icunormalizer2:BadMagicicuFold*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_icunormalizer2:RU*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_icunormalizer2:ÎάÏ*"), "//result[@numFound='2']");
+ assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
+ assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
+ }
+}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -1,4 +1,3 @@
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -37,7 +36,7 @@ import org.apache.solr.common.SolrExcept
* </fieldType></pre>
*
*/
-public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
+public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent
{
@Override
@@ -53,5 +52,9 @@ public class GreekLowerCaseFilterFactory
public GreekLowerCaseFilter create(TokenStream in) {
return new GreekLowerCaseFilter(luceneMatchVersion, in);
}
+
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.tr.Tur
* </fieldType></pre>
*
*/
-public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml Thu Mar 22 18:03:21 2012
@@ -149,6 +149,28 @@
</fieldType>
+ <fieldType name="text_greek" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_turkish" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_russian" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
@@ -178,6 +200,9 @@
<field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
<field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
<field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
+ <field name="content_greek" type="text_greek" indexed="true" stored="true"/>
+ <field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
+ <field name="content_russian" type="text_russian" indexed="true" stored="true"/>
<dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
<dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Thu Mar 22 18:03:21 2012
@@ -67,6 +67,25 @@ public class TestFoldingMultitermQuery e
"content_keyword", docs[i]
));
}
+ // Mixing and matching amongst various languages is probalby a bad thing, so add some tests for various
+ // special filters
+ int idx = docs.length;
+ // Greek
+ assertU(adoc("id", Integer.toString(idx++), "content_greek", "ÎάÏοÏ"));
+ assertU(adoc("id", Integer.toString(idx++), "content_greek", "ÎÎΪÎΣ"));
+
+ // Turkish
+
+ assertU(adoc("id", Integer.toString(idx++), "content_turkish", "\u0130STANBUL"));
+ assertU(adoc("id", Integer.toString(idx++), "content_turkish", "ISPARTA"));
+ assertU(adoc("id", Integer.toString(idx++), "content_turkish", "izmir"));
+
+
+ // Russian normalization
+ assertU(adoc("id", Integer.toString(idx++), "content_russian", "ÑлекÑÑомагниÑной"));
+ assertU(adoc("id", Integer.toString(idx++), "content_russian", "ÐмеÑÑе"));
+ assertU(adoc("id", Integer.toString(idx++), "content_russian", "Ñиле"));
+
assertU(optimize());
}
@@ -272,4 +291,17 @@ public class TestFoldingMultitermQuery e
resetExceptionIgnores();
}
}
+ @Test
+ public void testGreek() {
+ assertQ(req("q", "content_greek:μαιο*"), "//result[@numFound='2']");
+ assertQ(req("q", "content_greek:ÎÎΪÎ*"), "//result[@numFound='2']");
+ assertQ(req("q", "content_greek:ÎάÏο*"), "//result[@numFound='2']");
+ }
+ @Test
+ public void testRussian() {
+ assertQ(req("q", "content_russian:ÑлÐкÑРомагн*Ñной"), "//result[@numFound='1']");
+ assertQ(req("q", "content_russian:Ðме*Ñе"), "//result[@numFound='1']");
+ assertQ(req("q", "content_russian:Си*е"), "//result[@numFound='1']");
+ assertQ(req("q", "content_russian:ÑÐекÑÑомагнÐÑ*"), "//result[@numFound='1']");
+ }
}
\ No newline at end of file