You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2012/06/26 04:09:23 UTC
svn commit: r1353803 - in /lucene/dev/branches/branch_4x/solr: ./ cloud-dev/
contrib/ core/ core/src/java/org/apache/solr/schema/
core/src/java/org/apache/solr/search/similarities/
core/src/test-files/solr/conf/ core/src/test/org/apache/solr/schema/ co...
Author: hossman
Date: Tue Jun 26 02:09:20 2012
New Revision: 1353803
URL: http://svn.apache.org/viewvc?rev=1353803&view=rev
Log:
SOLR-3577: better javadocs and error messages related to global/fieldtype SimilarityFactory usage (ie: SOLR-2338) (merge r1353801)
Added:
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml
- copied unchanged from r1353801, lucene/dev/trunk/solr/core/src/test-files/solr/conf/bad-schema-sim-global-vs-ft-mismatch.xml
Modified:
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (props changed)
lucene/dev/branches/branch_4x/solr/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/solr/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/README.txt (props changed)
lucene/dev/branches/branch_4x/solr/build.xml (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_4x/solr/common-build.xml (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema-tfidf.xml
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema.xml
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java
lucene/dev/branches/branch_4x/solr/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/example/solr/conf/schema.xml
lucene/dev/branches/branch_4x/solr/lib/ (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpclient-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpclient-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpcore-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpcore-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpmime-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpmime-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/scripts/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_4x/solr/testlogging.properties (props changed)
lucene/dev/branches/branch_4x/solr/webapp/ (props changed)
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java Tue Jun 26 02:09:20 2012
@@ -435,6 +435,16 @@ public final class IndexSchema {
}
if (simFactory instanceof SchemaAware) {
((SchemaAware)simFactory).inform(this);
+ } else {
+ // if the sim facotry isn't schema aware, then we are responsible for
+ // erroring if a field type is trying to specify a sim.
+ for (FieldType ft : fieldTypes.values()) {
+ if (null != ft.getSimilarity()) {
+ String msg = "FieldType '" + ft.getTypeName() + "' is configured with a similarity, but the global similarity does not support it: " + simFactory.getClass();
+ log.error(msg);
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
+ }
+ }
}
similarity = simFactory.getSimilarity();
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/SimilarityFactory.java Tue Jun 26 02:09:20 2012
@@ -17,8 +17,26 @@ package org.apache.solr.schema;
*/
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.solr.schema.SchemaAware; // javadocs
+import org.apache.solr.schema.FieldType; // javadocs
import org.apache.solr.common.params.SolrParams;
+
+/**
+ * A factory interface for configuring a {@link Similarity} in the Solr
+ * schema.xml.
+ *
+ * <p>
+ * Subclasses of <code>SimilarityFactory</code> which are {@link SchemaAware}
+ * must take responsibility for either consulting the similarities configured
+ * on individual field types, or generating appropriate error/warning messages
+ * if field type specific similarities exist but are being ignored. The
+ * <code>IndexSchema</code> will provide such error checking if a
+ * non-<code>SchemaAware</code> instance of <code>SimilarityFactory</code>
+ * is used.
+ *
+ * @see FieldType#getSimilarity
+ */
public abstract class SimilarityFactory {
protected SolrParams params;
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java Tue Jun 26 02:09:20 2012
@@ -26,9 +26,21 @@ import org.apache.solr.schema.SchemaAwar
import org.apache.solr.schema.SimilarityFactory;
/**
- * SimilarityFactory that returns a PerFieldSimilarityWrapper
- * that delegates to the fieldtype, if its configured, otherwise
+ * SimilarityFactory that returns a {@link PerFieldSimilarityWrapper}
+ * that delegates to the field type, if its configured, otherwise
* {@link DefaultSimilarity}.
+ *
+ * <p>
+ * <b>NOTE:</b> Users should be aware that in addition to supporting
+ * <code>Similarity</code> configurations specified on individual
+ * field types, this factory also differs in behavior from
+ * {@link DefaultSimilarityFactory} because of other differences in the
+ * implementations of <code>PerFieldSimilarityWrapper</code> and
+ * <code>DefaultSimilarity</code> - notably in methods such as
+ * {@link Similarity#coord} and {@link Similarity#queryNorm}.
+ * </p>
+ *
+ * @see FieldType#getSimilarity
*/
public class SchemaSimilarityFactory extends SimilarityFactory implements SchemaAware {
private Similarity similarity;
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema-tfidf.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema-tfidf.xml?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema-tfidf.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema-tfidf.xml Tue Jun 26 02:09:20 2012
@@ -27,14 +27,24 @@
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.DefaultSimilarityFactory"/>
</fieldType>
+
+ <!-- TF/IDF -->
+ <fieldType name="text_overlap" class="solr.TextField">
+ <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
+ <similarity class="solr.DefaultSimilarityFactory">
+ <bool name="discountOverlaps">false</bool>
+ </similarity>
+ </fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
+ <field name="text_overlap" type="text_overlap" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
+ <similarity class="solr.SchemaSimilarityFactory"/>
</schema>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema.xml?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/schema.xml Tue Jun 26 02:09:20 2012
@@ -401,33 +401,6 @@
providerClass="solr.OpenExchangeRatesOrgProvider"
ratesFileLocation="open-exchange-rates.json" />
- <!-- some per-field similarity examples -->
-
- <!-- specify a Similarity classname directly -->
- <fieldType name="sim1" class="solr.TextField">
- <analyzer>
- <tokenizer class="solr.MockTokenizerFactory"/>
- </analyzer>
- <similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
- </fieldType>
-
- <!-- specify a Similarity factory -->
- <fieldType name="sim2" class="solr.TextField">
- <analyzer>
- <tokenizer class="solr.MockTokenizerFactory"/>
- </analyzer>
- <similarity class="solr.CustomSimilarityFactory">
- <str name="echo">is there an echo?</str>
- </similarity>
- </fieldType>
-
- <!-- don't specify any sim at all: get the default -->
- <fieldType name="sim3" class="solr.TextField">
- <analyzer>
- <tokenizer class="solr.MockTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
<!-- omitPositions example -->
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
<analyzer>
@@ -570,10 +543,6 @@
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
- <field name="sim1text" type="sim1" indexed="true" stored="true"/>
- <field name="sim2text" type="sim2" indexed="true" stored="true"/>
- <field name="sim3text" type="sim3" indexed="true" stored="true"/>
-
<field name="nopositionstext" type="nopositions" indexed="true" stored="true"/>
<field name="tlong" type="tlong" indexed="true" stored="true" />
@@ -665,11 +634,6 @@
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
- <!-- make sure custom sims work with dynamic fields -->
- <dynamicField name="*_sim1" type="sim1" indexed="true" stored="true"/>
- <dynamicField name="*_sim2" type="sim2" indexed="true" stored="true"/>
- <dynamicField name="*_sim3" type="sim3" indexed="true" stored="true"/>
-
<!-- Type used to index the lat and lon components for the "location" FieldType -->
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" omitNorms="true" />
</fields>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java Tue Jun 26 02:09:20 2012
@@ -86,4 +86,9 @@ public class BadIndexSchemaTest extends
"can not be configured with a default value");
}
+ public void testPerFieldtypeSimButNoSchemaSimFactory() throws Exception {
+ doTest("bad-schema-sim-global-vs-ft-mismatch.xml", "global similarity does not support it");
+ }
+
+
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/similarities/TestDefaultSimilarityFactory.java Tue Jun 26 02:09:20 2012
@@ -18,6 +18,7 @@ package org.apache.solr.search.similarit
*/
import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
@@ -30,7 +31,16 @@ public class TestDefaultSimilarityFactor
}
/** default parameters */
- public void test() throws Exception {
- assertEquals(DefaultSimilarity.class, getSimilarity("text").getClass());
+ public void testDefaults() throws Exception {
+ Similarity sim = getSimilarity("text");
+ assertEquals(DefaultSimilarity.class, sim.getClass());
+ assertEquals(true, ((DefaultSimilarity)sim).getDiscountOverlaps());
}
+ /** explicit params */
+ public void testParams() throws Exception {
+ Similarity sim = getSimilarity("text_overlap");
+ assertEquals(DefaultSimilarity.class, sim.getClass());
+ assertEquals(false, ((DefaultSimilarity)sim).getDiscountOverlaps());
+ }
+
}
Modified: lucene/dev/branches/branch_4x/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/example/solr/conf/schema.xml?rev=1353803&r1=1353802&r2=1353803&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_4x/solr/example/solr/conf/schema.xml Tue Jun 26 02:09:20 2012
@@ -244,23 +244,6 @@
<!-- copy name to alphaNameSort, a field designed for sorting by name -->
<!-- <copyField source="name" dest="alphaNameSort"/> -->
-
- <!-- Similarity is the scoring routine for each document vs. a query.
- A custom similarity may be specified here, but the default is fine
- for most applications. -->
- <!-- <similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/> -->
- <!-- ... OR ...
- Specify a SimilarityFactory class name implementation
- allowing parameters to be used.
- -->
- <!--
- <similarity class="com.example.solr.CustomSimilarityFactory">
- <str name="paramkey">param value</str>
- </similarity>
- -->
-
-
-
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
@@ -1057,7 +1040,16 @@
</analyzer>
</fieldType>
</types>
-
-
+
+ <!-- Similarity is the scoring routine for each document vs. a query.
+ A custom Similarity or SimilarityFactory may be specified here, but
+ the default is fine for most applications.
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
+ -->
+ <!--
+ <similarity class="com.example.solr.CustomSimilarityFactory">
+ <str name="paramkey">param value</str>
+ </similarity>
+ -->
</schema>