You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2012/06/26 03:59:17 UTC

svn commit: r1353801 - in /lucene/dev/trunk/solr: core/src/java/org/apache/solr/schema/ core/src/java/org/apache/solr/search/similarities/ core/src/test-files/solr/conf/ core/src/test/org/apache/solr/schema/ core/src/test/org/apache/solr/search/similar...

Author: hossman
Date: Tue Jun 26 01:59:15 2012
New Revision: 1353801

URL: http://svn.apache.org/viewvc?rev=1353801&view=rev
Log:
SOLR-3577: better javadocs and error messages related to global/fieldtype SimilarityFactory usage (ie: SOLR-2338)

Added:
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml   (with props)
Modified:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-tfidf.xml
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java
    lucene/dev/trunk/solr/example/solr/conf/schema.xml

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/IndexSchema.java?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/IndexSchema.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/IndexSchema.java Tue Jun 26 01:59:15 2012
@@ -435,6 +435,16 @@ public final class IndexSchema {
     }
     if (simFactory instanceof SchemaAware) {
       ((SchemaAware)simFactory).inform(this);
+    } else {
+      // if the sim facotry isn't schema aware, then we are responsible for
+      // erroring if a field type is trying to specify a sim.
+      for (FieldType ft : fieldTypes.values()) {
+        if (null != ft.getSimilarity()) {
+          String msg = "FieldType '" + ft.getTypeName() + "' is configured with a similarity, but the global similarity does not support it: " + simFactory.getClass();
+          log.error(msg);
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
+        }
+      }
     }
     similarity = simFactory.getSimilarity();
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java Tue Jun 26 01:59:15 2012
@@ -17,8 +17,26 @@ package org.apache.solr.schema;
  */
 
 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.solr.schema.SchemaAware; // javadocs
+import org.apache.solr.schema.FieldType; // javadocs
 import org.apache.solr.common.params.SolrParams;
 
+
+/**
+ * A factory interface for configuring a {@link Similarity} in the Solr 
+ * schema.xml.  
+ * 
+ * <p>
+ * Subclasses of <code>SimilarityFactory</code> which are {@link SchemaAware} 
+ * must take responsibility for either consulting the similarities configured 
+ * on individual field types, or generating appropriate error/warning messages 
+ * if field type specific similarities exist but are being ignored.  The 
+ * <code>IndexSchema</code> will provide such error checking if a 
+ * non-<code>SchemaAware</code> instance of <code>SimilarityFactory</code> 
+ * is used.
+ * 
+ * @see FieldType#getSimilarity
+ */
 public abstract class SimilarityFactory {
   protected SolrParams params;
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java Tue Jun 26 01:59:15 2012
@@ -26,9 +26,21 @@ import org.apache.solr.schema.SchemaAwar
 import org.apache.solr.schema.SimilarityFactory;
 
 /**
- * SimilarityFactory that returns a PerFieldSimilarityWrapper
- * that delegates to the fieldtype, if its configured, otherwise
+ * SimilarityFactory that returns a {@link PerFieldSimilarityWrapper}
+ * that delegates to the field type, if its configured, otherwise
  * {@link DefaultSimilarity}.
+ *
+ * <p>
+ * <b>NOTE:</b> Users should be aware that in addition to supporting 
+ * <code>Similarity</code> configurations specified on individual 
+ * field types, this factory also differs in behavior from 
+ * {@link DefaultSimilarityFactory} because of other differences in the 
+ * implementations of <code>PerFieldSimilarityWrapper</code> and 
+ * <code>DefaultSimilarity</code> - notably in methods such as 
+ * {@link Similarity#coord} and {@link Similarity#queryNorm}.  
+ * </p>
+ *
+ * @see FieldType#getSimilarity
  */
 public class SchemaSimilarityFactory extends SimilarityFactory implements SchemaAware {
   private Similarity similarity;

Added: lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml?rev=1353801&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml (added)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml Tue Jun 26 01:59:15 2012
@@ -0,0 +1,37 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
+  <types>
+    <fieldType name="sim1" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory"/>
+      </analyzer>
+      <!-- BAD: similarity here but no global sim that allows it -->
+      <similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
+    </fieldType>
+ </types>
+
+ <fields>
+   <field name="sim1text" type="sim1" indexed="true" stored="true"/>
+   <dynamicField name="*" type="sim1" />
+ </fields>
+
+ <defaultSearchField>sim1text</defaultSearchField>
+
+</schema>

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-tfidf.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-tfidf.xml?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-tfidf.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-tfidf.xml Tue Jun 26 01:59:15 2012
@@ -27,14 +27,24 @@
       <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
       <similarity class="solr.DefaultSimilarityFactory"/>
     </fieldType>
+
+    <!-- TF/IDF -->
+    <fieldType name="text_overlap" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
+      <similarity class="solr.DefaultSimilarityFactory">
+        <bool name="discountOverlaps">false</bool>
+      </similarity>
+    </fieldType>
    
   </types>
 
   <fields>
     <field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
     <field name="text" type="text" indexed="true" stored="false"/>
+    <field name="text_overlap" type="text_overlap" indexed="true" stored="false"/>
   </fields>
 
   <defaultSearchField>text</defaultSearchField>
   <uniqueKey>id</uniqueKey>
+  <similarity class="solr.SchemaSimilarityFactory"/>
 </schema>

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema.xml?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema.xml Tue Jun 26 01:59:15 2012
@@ -401,33 +401,6 @@
              providerClass="solr.OpenExchangeRatesOrgProvider"
              ratesFileLocation="open-exchange-rates.json" />
 
-  <!--  some per-field similarity examples -->
-  
-  <!--  specify a Similarity classname directly -->
-  <fieldType name="sim1" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.MockTokenizerFactory"/>
-    </analyzer>
-    <similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
-  </fieldType>
-
-  <!--  specify a Similarity factory -->  
-  <fieldType name="sim2" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.MockTokenizerFactory"/>
-    </analyzer>
-    <similarity class="solr.CustomSimilarityFactory">
-      <str name="echo">is there an echo?</str>
-    </similarity>
-  </fieldType>
-  
-  <!-- don't specify any sim at all: get the default  -->
-  <fieldType name="sim3" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.MockTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
-  
   <!-- omitPositions example -->
   <fieldType name="nopositions" class="solr.TextField" omitPositions="true">
     <analyzer>
@@ -570,10 +543,6 @@
    <field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
    <field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
 
-   <field name="sim1text" type="sim1" indexed="true" stored="true"/>
-   <field name="sim2text" type="sim2" indexed="true" stored="true"/>
-   <field name="sim3text" type="sim3" indexed="true" stored="true"/>
-   
    <field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
 
    <field name="tlong" type="tlong" indexed="true" stored="true" />
@@ -665,11 +634,6 @@
 
    <dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
 
-   <!-- make sure custom sims work with dynamic fields -->
-   <dynamicField name="*_sim1" type="sim1" indexed="true" stored="true"/>
-   <dynamicField name="*_sim2" type="sim2" indexed="true" stored="true"/>
-   <dynamicField name="*_sim3" type="sim3" indexed="true" stored="true"/>
-   
    <!-- Type used to index the lat and lon components for the "location" FieldType -->
    <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false" omitNorms="true" />
  </fields>

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java Tue Jun 26 01:59:15 2012
@@ -86,4 +86,9 @@ public class BadIndexSchemaTest extends 
            "can not be configured with a default value");
   }
 
+  public void testPerFieldtypeSimButNoSchemaSimFactory() throws Exception {
+    doTest("bad-schema-sim-global-vs-ft-mismatch.xml", "global similarity does not support it");
+  }
+
+
 }

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java Tue Jun 26 01:59:15 2012
@@ -18,6 +18,7 @@ package org.apache.solr.search.similarit
  */
 
 import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
 import org.junit.BeforeClass;
 
 /**
@@ -30,7 +31,16 @@ public class TestDefaultSimilarityFactor
   }
   
   /** default parameters */
-  public void test() throws Exception {
-    assertEquals(DefaultSimilarity.class, getSimilarity("text").getClass());
+  public void testDefaults() throws Exception {
+    Similarity sim = getSimilarity("text");
+    assertEquals(DefaultSimilarity.class, sim.getClass());
+    assertEquals(true, ((DefaultSimilarity)sim).getDiscountOverlaps());
   }
+  /** explicit params */
+  public void testParams() throws Exception {
+    Similarity sim = getSimilarity("text_overlap");
+    assertEquals(DefaultSimilarity.class, sim.getClass());
+    assertEquals(false, ((DefaultSimilarity)sim).getDiscountOverlaps());
+  }
+
 }

Modified: lucene/dev/trunk/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?rev=1353801&r1=1353800&r2=1353801&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/schema.xml Tue Jun 26 01:59:15 2012
@@ -244,23 +244,6 @@
    <!-- copy name to alphaNameSort, a field designed for sorting by name -->
    <!-- <copyField source="name" dest="alphaNameSort"/> -->
  
-
- <!-- Similarity is the scoring routine for each document vs. a query.
-      A custom similarity may be specified here, but the default is fine
-      for most applications.  -->
- <!-- <similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/> -->
- <!-- ... OR ...
-      Specify a SimilarityFactory class name implementation
-      allowing parameters to be used.
- -->
- <!--
- <similarity class="com.example.solr.CustomSimilarityFactory">
-   <str name="paramkey">param value</str>
- </similarity>
- -->
-
-
-
   <types>
     <!-- field type definitions. The "name" attribute is
        just a label to be used by field definitions.  The "class"
@@ -1057,7 +1040,16 @@
       </analyzer>
     </fieldType>
  </types>
-
-
+  
+  <!-- Similarity is the scoring routine for each document vs. a query.
+       A custom Similarity or SimilarityFactory may be specified here, but 
+       the default is fine for most applications.  
+       For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
+    -->
+  <!--
+     <similarity class="com.example.solr.CustomSimilarityFactory">
+       <str name="paramkey">param value</str>
+     </similarity>
+    -->
 
 </schema>