You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/22 22:41:38 UTC

svn commit: r1304082 - in /lucene/dev/branches/branch_3x: ./ solr/ solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/ solr/contrib/analysis-extras/src/test/org/apache/s...

Author: rmuir
Date: Thu Mar 22 21:41:38 2012
New Revision: 1304082

URL: http://svn.apache.org/viewvc?rev=1304082&view=rev
Log:
SOLR-2921: enable multitermqueries for these filters too

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java
    lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
    lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
    lucene/dev/branches/branch_3x/solr/core/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java

Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUTransformFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -37,7 +37,7 @@ import com.ibm.icu.text.Transliterator;
  * </ul>
  * @see Transliterator
  */
-public class ICUTransformFilterFactory extends BaseTokenFilterFactory {
+public class ICUTransformFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   private Transliterator transliterator;
   
   // TODO: add support for custom rules
@@ -64,4 +64,9 @@ public class ICUTransformFilterFactory e
   public TokenStream create(TokenStream input) {
     return new ICUTransformFilter(input, transliterator);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/conf/schema-folding-extra.xml Thu Mar 22 21:41:38 2012
@@ -33,6 +33,13 @@
         <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
       </analyzer>
     </fieldType>
+    
+    <fieldType name="text_icutransform" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
+      </analyzer>
+    </fieldType>
 
    </types>
 
@@ -40,6 +47,7 @@
     <field name="id" type="string" indexed="true" stored="true" required="true"/>
     <field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
     <field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+    <field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
 
   </fields>
 

Modified: lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java Thu Mar 22 21:41:38 2012
@@ -17,7 +17,6 @@ package org.apache.solr.analysis;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.IndexWriter;
 import org.apache.solr.SolrTestCaseJ4;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -30,7 +29,6 @@ public class TestFoldingMultitermExtrasQ
   @BeforeClass
   public static void beforeTests() throws Exception {
     initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", "analysis-extras/solr");
-    IndexWriter iw;
 
     int idx = 1;
     // ICUFoldingFilterFactory
@@ -54,7 +52,10 @@ public class TestFoldingMultitermExtrasQ
     assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELÄ°F"));
     assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
 
-    assertU(optimize());
+    // ICUTransformFilterFactory
+    assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "Российская"));
+
+    assertU(commit());
   }
 
   @Test
@@ -73,4 +74,8 @@ public class TestFoldingMultitermExtrasQ
     assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
     assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
   }
+  
+  public void testICUTransform() {
+    assertQ(req("q", "content_icutransform:Росс*"), "//result[@numFound='1']");
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,9 +31,14 @@ import org.apache.lucene.analysis.ar.Ara
  * &lt;/fieldType&gt;</pre>
  * @version $Id$
  */
-public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
+public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
 
   public ArabicNormalizationFilter create(TokenStream input) {
     return new ArabicNormalizationFilter(input);
   }
+
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CJKWidthFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -33,9 +33,14 @@ import org.apache.lucene.analysis.cjk.CJ
  * &lt;/fieldType&gt;</pre>
  */
 
-public class CJKWidthFilterFactory extends BaseTokenFilterFactory {
+public class CJKWidthFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   
   public TokenStream create(TokenStream input) {
     return new CJKWidthFilter(input);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/GermanNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,9 +31,14 @@ import org.apache.lucene.analysis.de.Ger
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre> 
  */
-public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class GermanNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
 
   public TokenStream create(TokenStream input) {
     return new GermanNormalizationFilter(input);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.hi.Hin
  * &lt;/fieldType&gt;</pre>
  * @version $Id$  
  */
-public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   public TokenStream create(TokenStream input) {
     return new HindiNormalizationFilter(input);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -31,8 +31,13 @@ import org.apache.lucene.analysis.in.Ind
  * &lt;/fieldType&gt;</pre>
  * @version $Id$   
  */
-public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   public TokenStream create(TokenStream input) {
     return new IndicNormalizationFilter(input);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java Thu Mar 22 21:41:38 2012
@@ -34,9 +34,14 @@ import org.apache.lucene.analysis.TokenS
  * &lt;/fieldType&gt;</pre>
  * @version $Id$
  */
-public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
+public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
   public PersianNormalizationFilter create(TokenStream input) {
     return new PersianNormalizationFilter(input);
   }
+  
+  @Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }
 

Modified: lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml Thu Mar 22 21:41:38 2012
@@ -169,6 +169,42 @@
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
+    
+    <fieldType name="text_persian" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.PersianNormalizationFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <fieldType name="text_arabic" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ArabicNormalizationFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <fieldType name="text_hindi" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.IndicNormalizationFilterFactory"/>
+        <filter class="solr.HindiNormalizationFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <fieldType name="text_german" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.GermanNormalizationFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <fieldType name="text_width" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.CJKWidthFilterFactory"/>
+      </analyzer>
+    </fieldType>
 
     <fieldType name="text_isolatin" class="solr.TextField">
       <analyzer>
@@ -211,6 +247,11 @@
     <field name="content_turkish" type="text_turkish" indexed="true" stored="true"/>
     <field name="content_russian" type="text_russian" indexed="true" stored="true"/>
     <field name="content_isolatin" type="text_isolatin" indexed="true" stored="true"/>
+    <field name="content_persian" type="text_persian" indexed="true" stored="true"/>
+    <field name="content_arabic" type="text_arabic" indexed="true" stored="true"/>
+    <field name="content_hindi" type="text_hindi" indexed="true" stored="true"/>
+    <field name="content_german" type="text_german" indexed="true" stored="true"/>
+    <field name="content_width" type="text_width" indexed="true" stored="true"/>
 
     <dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
     <dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>

Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1304082&r1=1304081&r2=1304082&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Thu Mar 22 21:41:38 2012
@@ -17,7 +17,6 @@ package org.apache.solr.search;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.IndexWriter;
 import org.apache.solr.SolrTestCaseJ4;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -31,7 +30,6 @@ public class TestFoldingMultitermQuery e
   @BeforeClass
   public static void beforeTests() throws Exception {
     initCore("solrconfig.xml", "schema-folding.xml");
-    IndexWriter iw;
 
     String docs[] = {
         "abcdefg1 finger",
@@ -91,7 +89,22 @@ public class TestFoldingMultitermQuery e
     assertU(adoc("id", Integer.toString(idx++), "content_isolatin", "YppÉÉÉ all is well"));
     assertU(adoc("id", Integer.toString(idx++), "content_isolatin", "that's äll"));
 
-    assertU(optimize());
+    // persian normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_persian", "هاي"));
+    
+    // arabic normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_arabic", "روبرت"));
+
+    // hindi normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_hindi", "हिंदी"));
+    assertU(adoc("id", Integer.toString(idx++), "content_hindi", "अाअा"));
+    
+    // german normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_german", "weissbier"));
+    
+    // cjk width normalization
+    assertU(adoc("id", Integer.toString(idx++), "content_width", "ヴィッツ"));
+    assertU(commit());
   }
 
   @Test
@@ -299,4 +312,25 @@ public class TestFoldingMultitermQuery e
     assertQ(req("q", "content_isolatin:äl*"), "//result[@numFound='2']");
     assertQ(req("q", "content_isolatin:ál*"), "//result[@numFound='2']");
   }
+  
+  public void testPersian() {
+    assertQ(req("q", "content_persian:های*"), "//result[@numFound='1']");
+  }
+  
+  public void testArabic() {
+    assertQ(req("q", "content_arabic:روبرـــــــــــــــــــــــــــــــــت*"), "//result[@numFound='1']");
+  }
+  
+  public void testHindi() {
+    assertQ(req("q", "content_hindi:हिन्दी*"), "//result[@numFound='1']");
+    assertQ(req("q", "content_hindi:आआ*"), "//result[@numFound='1']");
+  }
+  
+  public void testGerman() {
+    assertQ(req("q", "content_german:weiß*"), "//result[@numFound='1']");
+  }
+  
+  public void testCJKWidth() {
+    assertQ(req("q", "content_width:ヴィ*"), "//result[@numFound='1']");
+  }
 }
\ No newline at end of file