You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/03/22 19:03:22 UTC

svn commit: r1303939 - in /lucene/dev/trunk/solr: contrib/analysis-extras/src/java/org/apache/solr/analysis/ contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/ contrib/analysis-extras/src/test/org/apache/solr/analysis/ core/src/java/org/...

Author: erick
Date: Thu Mar 22 18:03:21 2012
New Revision: 1303939

URL: http://svn.apache.org/viewvc?rev=1303939&view=rev
Log:
Fixes for SOLR-2921 (making more components  MultiTermAware)

Added:
    lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
    lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
Modified:
    lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java
    lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java

Modified: lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java (original)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUFoldingFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -21,10 +21,14 @@ import org.apache.lucene.analysis.icu.IC
  */
 
 /** Factory for {@link ICUFoldingFilter} */
-public class ICUFoldingFilterFactory extends BaseTokenFilterFactory {
+public class ICUFoldingFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
 
   @Override
   public TokenStream create(TokenStream input) {
     return new ICUFoldingFilter(input);
   }
+
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java Thu Mar 22 18:03:21 2012
@@ -44,7 +44,7 @@ import com.ibm.icu.text.UnicodeSet;
  * @see Normalizer2
  * @see FilteredNormalizer2
  */
-public class ICUNormalizer2FilterFactory extends BaseTokenFilterFactory {
+public class ICUNormalizer2FilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   private Normalizer2 normalizer;
 
   // TODO: support custom normalization
@@ -78,4 +78,8 @@ public class ICUNormalizer2FilterFactory
   public TokenStream create(TokenStream input) {
     return new ICUNormalizer2Filter(input, normalizer);
   }
+
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml?rev=1303939&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml Thu Mar 22 18:03:21 2012
@@ -0,0 +1,49 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="test" version="1.0">
+  <types>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+
+    <fieldType name="text_icufolding" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ICUFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_icunormalizer2" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
+      </analyzer>
+    </fieldType>
+
+   </types>
+
+  <fields>
+    <field name="id" type="string" indexed="true" stored="true" required="true"/>
+    <field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
+    <field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+
+  </fields>
+
+  <defaultSearchField>id</defaultSearchField>
+  <uniqueKey>id</uniqueKey>
+
+</schema>

Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java?rev=1303939&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java Thu Mar 22 18:03:21 2012
@@ -0,0 +1,77 @@
+package org.apache.solr.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
+
+  public String getCoreName() {
+    return "basic";
+  }
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
+    IndexWriter iw;
+
+    int idx = 1;
+    // ICUFoldingFilterFactory
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "BadMagicICUFolding"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Ruß"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ΜΆΪΟΣ"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Μάϊος"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "résumé"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "re\u0301sume\u0301"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ELÄ°F"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "eli\u0307f"));
+
+    // ICUNormalizer2FilterFactory
+
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "BadMagicICUFolding"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Ruß"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ΜΆΪΟΣ"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Μάϊος"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "résumé"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "re\u0301sume\u0301"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELÄ°F"));
+    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
+
+    assertU(optimize());
+  }
+
+  @Test
+  public void testICUFolding() {
+    assertQ(req("q", "content_icufolding:BadMagicicuFold*"), "//result[@numFound='1']");
+    assertQ(req("q", "content_icufolding:rU*"), "//result[@numFound='1']");
+    assertQ(req("q", "content_icufolding:Re*Me"), "//result[@numFound='2']");
+    assertQ(req("q", "content_icufolding:RE\u0301su*"), "//result[@numFound='2']");
+    assertQ(req("q", "content_icufolding:El*"), "//result[@numFound='2']");
+  }
+  @Test
+  public void testICUNormalizer2() {
+    assertQ(req("q", "content_icunormalizer2:BadMagicicuFold*"), "//result[@numFound='1']");
+    assertQ(req("q", "content_icunormalizer2:RU*"), "//result[@numFound='1']");
+    assertQ(req("q", "content_icunormalizer2:Μάϊ*"), "//result[@numFound='2']");
+    assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
+    assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
+  }
+}

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -1,4 +1,3 @@
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -37,7 +36,7 @@ import org.apache.solr.common.SolrExcept
  * &lt;/fieldType&gt;</pre> 
  *
  */
-public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory 
+public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent
 {
  
   @Override
@@ -53,5 +52,9 @@ public class GreekLowerCaseFilterFactory
   public GreekLowerCaseFilter create(TokenStream in) {
     return new GreekLowerCaseFilter(luceneMatchVersion, in);
   }
+
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java Thu Mar 22 18:03:21 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.tr.Tur
  * &lt;/fieldType&gt;</pre> 
  *
  */
-public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory  implements MultiTermAwareComponent {
   public TokenStream create(TokenStream input) {
     return new TurkishLowerCaseFilter(input);
   }
+
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml Thu Mar 22 18:03:21 2012
@@ -149,6 +149,28 @@
     </fieldType>
 
 
+    <fieldType name="text_greek" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.GreekLowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_turkish" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.TurkishLowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_russian" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
     <fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
     <fieldType name="float" class="solr.TrieFloatField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
     <fieldType name="long" class="solr.TrieLongField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
@@ -178,6 +200,9 @@
     <field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
     <field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
     <field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
+    <field name="content_greek" type="text_greek" indexed="true" stored="true"/>
+    <field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
+    <field name="content_russian" type="text_russian" indexed="true" stored="true"/>
 
     <dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
     <dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1303939&r1=1303938&r2=1303939&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Thu Mar 22 18:03:21 2012
@@ -67,6 +67,25 @@ public class TestFoldingMultitermQuery e
           "content_keyword", docs[i]
       ));
     }
+    // Mixing and matching amongst various languages is probalby a bad thing, so add some tests for various
+    // special filters
+    int idx = docs.length;
+    // Greek
+    assertU(adoc("id", Integer.toString(idx++), "content_greek", "Μάϊος"));
+    assertU(adoc("id", Integer.toString(idx++), "content_greek", "ΜΆΪΟΣ"));
+
+    // Turkish
+
+    assertU(adoc("id", Integer.toString(idx++), "content_turkish", "\u0130STANBUL"));
+    assertU(adoc("id", Integer.toString(idx++), "content_turkish", "ISPARTA"));
+    assertU(adoc("id", Integer.toString(idx++), "content_turkish", "izmir"));
+
+
+    // Russian normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_russian", "электромагнитной"));
+    assertU(adoc("id", Integer.toString(idx++), "content_russian", "Вместе"));
+    assertU(adoc("id", Integer.toString(idx++), "content_russian", "силе"));
+
     assertU(optimize());
   }
 
@@ -272,4 +291,17 @@ public class TestFoldingMultitermQuery e
       resetExceptionIgnores();
     }
   }
+  @Test
+  public void testGreek() {
+    assertQ(req("q", "content_greek:μαιο*"), "//result[@numFound='2']");
+    assertQ(req("q", "content_greek:ΜΆΪΟ*"), "//result[@numFound='2']");
+    assertQ(req("q", "content_greek:Μάϊο*"), "//result[@numFound='2']");
+  }
+  @Test
+  public void testRussian() {
+    assertQ(req("q", "content_russian:элЕктРомагн*тной"), "//result[@numFound='1']");
+    assertQ(req("q", "content_russian:Вме*те"), "//result[@numFound='1']");
+    assertQ(req("q", "content_russian:Си*е"), "//result[@numFound='1']");
+    assertQ(req("q", "content_russian:эЛектромагнИт*"), "//result[@numFound='1']");
+  }
 }
\ No newline at end of file