You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/09/09 05:16:16 UTC
svn commit: r1701895 - in /lucene/dev/trunk: lucene/
lucene/classification/src/test/org/apache/lucene/classification/
lucene/core/src/java/org/apache/lucene/search/similarities/
lucene/core/src/test/org/apache/lucene/search/ lucene/core/src/test/org/ap...
Author: rmuir
Date: Wed Sep 9 03:16:15 2015
New Revision: 1701895
URL: http://svn.apache.org/r1701895
Log:
LUCENE-6758: don't let queries over nonexistent fields screw up querynorm
Removed:
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/similarities/TestDefaultSimilarity.java
lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Sep 9 03:16:15 2015
@@ -28,6 +28,9 @@ New Features
length computations, to avoid skew from documents that don't have the field.
(Ahmet Arslan via Robert Muir)
+* LUCENE-6758: Use docCount+1 for DefaultSimilarity's IDF, so that queries
+ containing nonexistent fields won't screw up querynorm. (Terry Smith, Robert Muir)
+
* SOLR-7876: The QueryTimeout interface now has a isTimeoutEnabled method
that can return false to exit from ExitableDirectoryReader wrapping at
the point fields() is called. (yonik)
Modified: lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java (original)
+++ lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java Wed Sep 9 03:16:15 2015
@@ -25,6 +25,7 @@ import org.apache.lucene.classification.
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.util.BytesRef;
import org.junit.Test;
@@ -42,7 +43,7 @@ public class KNearestNeighborClassifierT
leafReader = getSampleIndex(analyzer);
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
- ClassificationResult<BytesRef> resultDS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
+ ClassificationResult<BytesRef> resultDS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
ClassificationResult<BytesRef> resultLMS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
assertTrue(resultDS.getScore() != resultLMS.getScore());
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java Wed Sep 9 03:16:15 2015
@@ -133,10 +133,10 @@ public class DefaultSimilarity extends T
return 1;
}
- /** Implemented as <code>log(docCount/(docFreq+1)) + 1</code>. */
+ /** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
@Override
public float idf(long docFreq, long docCount) {
- return (float)(Math.log(docCount/(double)(docFreq+1)) + 1.0);
+ return (float)(Math.log((docCount+1)/(double)(docFreq+1)) + 1.0);
}
/**
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java Wed Sep 9 03:16:15 2015
@@ -346,7 +346,7 @@ import org.apache.lucene.util.BytesRef;
* </td>
* <td valign="middle" align="center">
* <table summary="inverse document frequency computation">
- * <tr><td align="center" style="text-align: center"><small>docCount</small></td></tr>
+ * <tr><td align="center" style="text-align: center"><small>docCount+1</small></td></tr>
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
* <tr><td align="center" style="text-align: center"><small>docFreq+1</small></td></tr>
* </table>
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java Wed Sep 9 03:16:15 2015
@@ -330,11 +330,11 @@ public class TestPhraseQuery extends Luc
assertEquals(3, hits.length);
// Make sure that those matches where the terms appear closer to
// each other get a higher score:
- assertEquals(0.71, hits[0].score, 0.01);
+ assertEquals(1.0, hits[0].score, 0.01);
assertEquals(0, hits[0].doc);
- assertEquals(0.44, hits[1].score, 0.01);
+ assertEquals(0.62, hits[1].score, 0.01);
assertEquals(1, hits[1].doc);
- assertEquals(0.31, hits[2].score, 0.01);
+ assertEquals(0.43, hits[2].score, 0.01);
assertEquals(2, hits[2].doc);
QueryUtils.check(random(), query,searcher);
reader.close();
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java Wed Sep 9 03:16:15 2015
@@ -121,17 +121,6 @@ public class TestTermScorer extends Luce
// The scores should be the same
assertTrue(doc0.score + " does not equal: " + doc5.score,
doc0.score == doc5.score);
- /*
- * Score should be (based on Default Sim.: All floats are approximate tf = 1
- * numDocs = 6 docFreq(all) = 2 idf = ln(6/3) + 1 = 1.693147 idf ^ 2 =
- * 2.8667 boost = 1 lengthNorm = 1 //there is 1 term in every document coord
- * = 1 sumOfSquaredWeights = (idf * boost) ^ 2 = 1.693147 ^ 2 = 2.8667
- * queryNorm = 1 / (sumOfSquaredWeights)^0.5 = 1 /(1.693147) = 0.590
- *
- * score = 1 * 2.8667 * 1 * 1 * 0.590 = 1.69
- */
- assertTrue(doc0.score + " does not equal: " + 1.6931472f,
- doc0.score == 1.6931472f);
}
public void testNext() throws Exception {
@@ -145,10 +134,8 @@ public class TestTermScorer extends Luce
Scorer ts = weight.scorer(context);
assertTrue("next did not return a doc",
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- assertTrue("score is not correct", ts.score() == 1.6931472f);
assertTrue("next did not return a doc",
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- assertTrue("score is not correct", ts.score() == 1.6931472f);
assertTrue("next returned a doc and it should not have",
ts.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/similarities/TestDefaultSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/similarities/TestDefaultSimilarity.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/similarities/TestDefaultSimilarity.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/similarities/TestDefaultSimilarity.java Wed Sep 9 03:16:15 2015
@@ -17,14 +17,145 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
public class TestDefaultSimilarity extends LuceneTestCase {
+ private Directory directory;
+ private IndexReader indexReader;
+ private IndexSearcher indexSearcher;
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ directory = newDirectory();
+ try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig())) {
+ Document document = new Document();
+ document.add(new StringField("test", "hit", Store.NO));
+ indexWriter.addDocument(document);
+ indexWriter.commit();
+ }
+ indexReader = DirectoryReader.open(directory);
+ indexSearcher = newSearcher(indexReader);
+ indexSearcher.setSimilarity(new DefaultSimilarity());
+ }
+ @Override
+ public void tearDown() throws Exception {
+ IOUtils.close(indexReader, directory);
+ super.tearDown();
+ }
+
// Javadocs give this as an example so we test to make sure it's correct:
public void testPrecisionLoss() throws Exception {
DefaultSimilarity sim = new DefaultSimilarity();
float v = sim.decodeNormValue(sim.encodeNormValue(.89f));
assertEquals(0.875f, v, 0.0001f);
}
+
+
+ public void testHit() throws IOException {
+ Query query = new TermQuery(new Term("test", "hit"));
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testMiss() throws IOException {
+ Query query = new TermQuery(new Term("test", "miss"));
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(0, topDocs.totalHits);
+ }
+
+ public void testEmpty() throws IOException {
+ Query query = new TermQuery(new Term("empty", "miss"));
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(0, topDocs.totalHits);
+ }
+
+ public void testBQHit() throws IOException {
+ Query query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
+ .build();
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testBQHitOrMiss() throws IOException {
+ Query query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
+ .add(new TermQuery(new Term("test", "miss")), Occur.SHOULD)
+ .build();
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testBQHitOrEmpty() throws IOException {
+ Query query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
+ .add(new TermQuery(new Term("empty", "miss")), Occur.SHOULD)
+ .build();
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testDMQHit() throws IOException {
+ Query query = new DisjunctionMaxQuery(
+ Arrays.asList(
+ new TermQuery(new Term("test", "hit"))),
+ 0);
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testDMQHitOrMiss() throws IOException {
+ Query query = new DisjunctionMaxQuery(
+ Arrays.asList(
+ new TermQuery(new Term("test", "hit")),
+ new TermQuery(new Term("test", "miss"))),
+ 0);
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
+
+ public void testDMQHitOrEmpty() throws IOException {
+ Query query = new DisjunctionMaxQuery(
+ Arrays.asList(
+ new TermQuery(new Term("test", "hit")),
+ new TermQuery(new Term("empty", "miss"))),
+ 0);
+ TopDocs topDocs = indexSearcher.search(query, 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(1, topDocs.scoreDocs.length);
+ assertTrue(topDocs.scoreDocs[0].score != 0);
+ }
}
Modified: lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java (original)
+++ lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java Wed Sep 9 03:16:15 2015
@@ -226,7 +226,7 @@ public class TestValueSources extends Lu
try {
searcher.setSimilarity(new DefaultSimilarity());
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
- assertHits(new FunctionQuery(vs), new float[] { 0.5945349f, 0.5945349f });
+ assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
@@ -398,7 +398,7 @@ public class TestValueSources extends Lu
searcher.setSimilarity(new DefaultSimilarity());
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F);
- assertHits(new FunctionQuery(vs), new float[] { 42F, 1F });
+ assertHits(new FunctionQuery(vs), new float[] { 42F, 1.4054651F });
// valuesource should exist only for things matching the term query
// sanity check via quick & dirty wrapper arround tf
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java Wed Sep 9 03:16:15 2015
@@ -87,8 +87,8 @@ public class DisMaxRequestHandlerTest ex
req("cool stuff")
,"//*[@numFound='3']"
,"//result/doc[1]/int[@name='id'][.='42']"
- ,"//result/doc[2]/int[@name='id'][.='8675309']"
- ,"//result/doc[3]/int[@name='id'][.='666']"
+ ,"//result/doc[2]/int[@name='id'][.='666']"
+ ,"//result/doc[3]/int[@name='id'][.='8675309']"
);
assertQ("multi qf",
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java Wed Sep 9 03:16:15 2015
@@ -1014,15 +1014,15 @@ public class StatsComponentTest extends
assertQ("functions over a query",
req("q","*:*", "stats", "true",
"stats.field", "{!lucene key=k}foo_t:cow")
- // scores are: 1.0, 0.625, 0.5, & "missing"
- , kpre + "double[@name='min'][.='0.5']"
- , kpre + "double[@name='max'][.='1.0']"
- , kpre + "double[@name='sum'][.='2.125']"
+ // TODO: change to not rely on exact scores
+ , kpre + "double[@name='min'][.='0.6115717887878418']"
+ , kpre + "double[@name='max'][.='1.2231435775756836']"
+ , kpre + "double[@name='sum'][.='2.5991801023483276']"
, kpre + "long[@name='count'][.='3']"
, kpre + "long[@name='missing'][.='1']"
- , kpre + "double[@name='sumOfSquares'][.='1.640625']"
- , kpre + "double[@name='mean'][.='0.7083333333333334']"
- , kpre + "double[@name='stddev'][.='0.2602082499332666']"
+ , kpre + "double[@name='sumOfSquares'][.='2.4545065967701163']"
+ , kpre + "double[@name='mean'][.='0.8663933674494425']"
+ , kpre + "double[@name='stddev'][.='0.3182720497380833']"
);
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java?rev=1701895&r1=1701894&r2=1701895&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java Wed Sep 9 03:16:15 2015
@@ -79,10 +79,6 @@ public class TestDefaultStatsCache exten
if (clients.size() == 1) {
// only one shard
assertEquals(controlScore, shardScore);
- } else {
- assertTrue("control:" + controlScore.floatValue() + " shard:"
- + shardScore.floatValue(),
- controlScore.floatValue() > shardScore.floatValue());
}
}