You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/12/29 09:06:52 UTC
[1/4] lucene-solr:master: LUCENE-8111: IndexOrDocValuesQuery Javadoc
references outdated method name.
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x e2a26a42e -> 5b59707dc
refs/heads/master 8e439a0a5 -> bc9836fd5
LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bc9836fd
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bc9836fd
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bc9836fd
Branch: refs/heads/master
Commit: bc9836fd50319e5b0bdcf09e8b53954cee5d6cad
Parents: c95dc6d
Author: Adrien Grand <jp...@gmail.com>
Authored: Fri Dec 29 10:04:32 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:00 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +++++
.../java/org/apache/lucene/search/IndexOrDocValuesQuery.java | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bc9836fd/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ef7e005..a78770a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -107,6 +107,11 @@ Bug Fixes
* LUCENE-8077: Fixed bug in how CheckIndex verifies doc-value iterators.
(Xiaoshan Sun via Adrien Grand)
+Other
+
+* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
+ (Kai Chan via Adrien Grand)
+
======================= Lucene 7.2.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bc9836fd/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
index e679e81..f89924d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@@ -37,7 +37,7 @@ import org.apache.lucene.index.Term;
* String field;
* long minValue, maxValue;
* Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue);
- * Query dvQuery = SortedNumericDocValuesField.newRangeQuery(field, minValue, maxValue);
+ * Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue);
* Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery);
* </pre>
* The above query will be efficient as it will use points in the case that they
[2/4] lucene-solr:master: LUCENE-7993: Faster phrases if total hit
counts are not required.
Posted by jp...@apache.org.
LUCENE-7993: Faster phrases if total hit counts are not required.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c95dc6d9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c95dc6d9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c95dc6d9
Branch: refs/heads/master
Commit: c95dc6d95743f4a9a1ffe9baa04c3a9d1e3acdf9
Parents: b2f2481
Author: Adrien Grand <jp...@gmail.com>
Authored: Fri Dec 29 09:14:32 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:00 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../apache/lucene/search/ExactPhraseScorer.java | 23 ++++++++--
.../apache/lucene/search/MultiPhraseQuery.java | 14 +++---
.../org/apache/lucene/search/PhraseQuery.java | 14 +++---
.../apache/lucene/search/TestPhraseQuery.java | 47 ++++++++++++++++++++
5 files changed, 84 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0da2cfe..ef7e005 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -61,6 +61,9 @@ Optimizations
* LUCENE-4100: Disjunctions now support faster collection of top hits when the
total hit count is not required. (Stefan Pohl, Adrien Grand, Robert Muir)
+* LUCENE-7993: Phrase queries are now faster if total hit counts are not
+ required. (Adrien Grand)
+
======================= Lucene 7.3.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index 85a242e..f4a7ca7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -43,15 +43,17 @@ final class ExactPhraseScorer extends Scorer {
private int freq;
private final Similarity.SimScorer docScorer;
- private final boolean needsScores;
+ private final boolean needsScores, needsTotalHitCount;
private float matchCost;
+ private float minCompetitiveScore;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SimScorer docScorer, boolean needsScores,
+ Similarity.SimScorer docScorer, ScoreMode scoreMode,
float matchCost) throws IOException {
super(weight);
this.docScorer = docScorer;
- this.needsScores = needsScores;
+ this.needsScores = scoreMode.needsScores();
+ this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES;
List<DocIdSetIterator> iterators = new ArrayList<>();
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
@@ -66,10 +68,25 @@ final class ExactPhraseScorer extends Scorer {
}
@Override
+ public void setMinCompetitiveScore(float minScore) {
+ minCompetitiveScore = minScore;
+ }
+
+ @Override
public TwoPhaseIterator twoPhaseIterator() {
return new TwoPhaseIterator(conjunction) {
@Override
public boolean matches() throws IOException {
+ if (needsTotalHitCount == false && minCompetitiveScore > 0) {
+ int minFreq = postings[0].postings.freq();
+ for (int i = 1; i < postings.length; ++i) {
+ minFreq = Math.min(postings[i].postings.freq(), minFreq);
+ }
+ if (docScorer.score(docID(), minFreq) < minCompetitiveScore) {
+ // The maximum score we could get is less than the min competitive score
+ return false;
+ }
+ }
return phraseFreq() > 0;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index 44a5ad0..34361a7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -185,13 +185,13 @@ public class MultiPhraseQuery extends Query {
private final Similarity similarity;
private final Similarity.SimWeight stats;
private final Map<Term,TermContext> termContexts = new HashMap<>();
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
- public MultiPhraseWeight(IndexSearcher searcher, boolean needsScores, float boost)
+ public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
super(MultiPhraseQuery.this);
- this.needsScores = needsScores;
- this.similarity = searcher.getSimilarity(needsScores);
+ this.scoreMode = scoreMode;
+ this.similarity = searcher.getSimilarity(scoreMode.needsScores());
final IndexReaderContext context = searcher.getTopReaderContext();
// compute idf
@@ -283,11 +283,11 @@ public class MultiPhraseQuery extends Query {
if (slop == 0) {
return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode, totalMatchCost);
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode.needsScores(), totalMatchCost);
}
}
@@ -335,7 +335,7 @@ public class MultiPhraseQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
- return new MultiPhraseWeight(searcher, scoreMode.needsScores(), boost);
+ return new MultiPhraseWeight(searcher, scoreMode, boost);
}
/** Prints a user-readable version of this query. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index e0b60be..3d359b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -353,10 +353,10 @@ public class PhraseQuery extends Query {
private class PhraseWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
private transient TermContext states[];
- public PhraseWeight(IndexSearcher searcher, boolean needsScores, float boost)
+ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
super(PhraseQuery.this);
final int[] positions = PhraseQuery.this.getPositions();
@@ -365,8 +365,8 @@ public class PhraseQuery extends Query {
} else if (positions[0] != 0) {
throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first");
}
- this.needsScores = needsScores;
- this.similarity = searcher.getSimilarity(needsScores);
+ this.scoreMode = scoreMode;
+ this.similarity = searcher.getSimilarity(scoreMode.needsScores());
final IndexReaderContext context = searcher.getTopReaderContext();
states = new TermContext[terms.length];
TermStatistics termStats[] = new TermStatistics[terms.length];
@@ -434,11 +434,11 @@ public class PhraseQuery extends Query {
if (slop == 0) { // optimize exact case
return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode, totalMatchCost);
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode.needsScores(), totalMatchCost);
}
}
@@ -510,7 +510,7 @@ public class PhraseQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
- return new PhraseWeight(searcher, scoreMode.needsScores(), boost);
+ return new PhraseWeight(searcher, scoreMode, boost);
}
/** Prints a user-readable version of this query. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
index 2bbd0dd..eb31128 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
@@ -19,6 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Random;
@@ -33,7 +35,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -713,4 +718,46 @@ public class TestPhraseQuery extends LuceneTestCase {
builder.add(new Term("field", "three"), 4);
});
}
+
+ static String[] DOCS = new String[] {
+ "a b c d e f g h",
+ "b c b",
+ "c d d d e f g b",
+ "c b a b c",
+ "a a b b c c d d",
+ "a b c d a b c d a b c d"
+ };
+
+ public void testTopPhrases() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ String[] docs = Arrays.copyOf(DOCS, DOCS.length);
+ Collections.shuffle(Arrays.asList(docs), random());
+ for (String value : DOCS) {
+ Document doc = new Document();
+ doc.add(new TextField("f", value, Store.NO));
+ w.addDocument(doc);
+ }
+ IndexReader r = DirectoryReader.open(w);
+ w.close();
+ IndexSearcher searcher = newSearcher(r);
+ for (Query query : Arrays.asList(
+ new PhraseQuery("f", "b", "c"), // common phrase
+ new PhraseQuery("f", "e", "f"), // always appear next to each other
+ new PhraseQuery("f", "d", "d") // repeated term
+ )) {
+ for (int topN = 1; topN <= 2; ++topN) {
+ TopScoreDocCollector collector1 = TopScoreDocCollector.create(topN, null, true);
+ searcher.search(query, collector1);
+ ScoreDoc[] hits1 = collector1.topDocs().scoreDocs;
+ TopScoreDocCollector collector2 = TopScoreDocCollector.create(topN, null, false);
+ searcher.search(query, collector2);
+ ScoreDoc[] hits2 = collector2.topDocs().scoreDocs;
+ assertTrue("" + query, hits1.length > 0);
+ CheckHits.checkEqual(query, hits1, hits2);
+ }
+ }
+ r.close();
+ dir.close();
+ }
}
[4/4] lucene-solr:branch_7x: LUCENE-8111: IndexOrDocValuesQuery
Javadoc references outdated method name.
Posted by jp...@apache.org.
LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5b59707d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5b59707d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5b59707d
Branch: refs/heads/branch_7x
Commit: 5b59707dcd54ef0167ee617cedf4b9f8ab2756ae
Parents: e2a26a4
Author: Adrien Grand <jp...@gmail.com>
Authored: Fri Dec 29 10:04:32 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:26 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +++++
.../java/org/apache/lucene/search/IndexOrDocValuesQuery.java | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5b59707d/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a6fb3e7..dc3c992 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -46,6 +46,11 @@ Bug Fixes
* LUCENE-8077: Fixed bug in how CheckIndex verifies doc-value iterators.
(Xiaoshan Sun via Adrien Grand)
+Other
+
+* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
+ (Kai Chan via Adrien Grand)
+
======================= Lucene 7.2.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5b59707d/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
index 8df5675..06fb9c2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@@ -37,7 +37,7 @@ import org.apache.lucene.index.Term;
* String field;
* long minValue, maxValue;
* Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue);
- * Query dvQuery = SortedNumericDocValuesField.newRangeQuery(field, minValue, maxValue);
+ * Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue);
* Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery);
* </pre>
* The above query will be efficient as it will use points in the case that they
[3/4] lucene-solr:master: LUCENE-8010: Fix similarities so that they
pass tests.
Posted by jp...@apache.org.
LUCENE-8010: Fix similarities so that they pass tests.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b2f24816
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b2f24816
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b2f24816
Branch: refs/heads/master
Commit: b2f248164c1a3ddf213a56778d55c9252a022f18
Parents: 8e439a0
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Dec 28 17:44:01 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:00 2017 +0100
----------------------------------------------------------------------
.../lucene/search/similarities/Axiomatic.java | 28 +++++++++++++++++++-
.../search/similarities/AxiomaticF1EXP.java | 2 +-
.../search/similarities/AxiomaticF1LOG.java | 2 +-
.../search/similarities/AxiomaticF3EXP.java | 2 +-
.../search/similarities/AxiomaticF3LOG.java | 2 +-
.../search/similarities/DistributionSPL.java | 26 +++++++++++++++---
.../lucene/search/similarities/LambdaDF.java | 7 ++++-
.../lucene/search/similarities/LambdaTTF.java | 7 ++++-
.../search/similarities/TestAxiomaticF1EXP.java | 4 ---
.../search/similarities/TestAxiomaticF1LOG.java | 4 ---
.../search/similarities/TestAxiomaticF3EXP.java | 7 +----
.../search/similarities/TestAxiomaticF3LOG.java | 7 +----
.../similarities/TestDistributionSPL.java | 4 ---
.../search/similarities/TestSimilarity2.java | 7 +++++
.../search/similarities/RandomSimilarity.java | 11 +++++---
15 files changed, 81 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
index 403773e..553fd42 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
@@ -17,6 +17,7 @@
package org.apache.lucene.search.similarities;
+import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.Explanation;
@@ -101,11 +102,13 @@ public abstract class Axiomatic extends SimilarityBase {
@Override
public double score(BasicStats stats, double freq, double docLen) {
- return tf(stats, freq, docLen)
+ double score = tf(stats, freq, docLen)
* ln(stats, freq, docLen)
* tfln(stats, freq, docLen)
* idf(stats, freq, docLen)
- gamma(stats, freq, docLen);
+ // AxiomaticF3 similarities might produce negative scores due to their gamma component
+ return Math.max(0, score);
}
@Override
@@ -115,6 +118,29 @@ public abstract class Axiomatic extends SimilarityBase {
}
@Override
+ protected Explanation explain(
+ BasicStats stats, int doc, Explanation freq, double docLen) {
+ List<Explanation> subs = new ArrayList<>();
+ explain(subs, stats, doc, freq.getValue(), docLen);
+
+ double score = tf(stats, freq.getValue(), docLen)
+ * ln(stats, freq.getValue(), docLen)
+ * tfln(stats, freq.getValue(), docLen)
+ * idf(stats, freq.getValue(), docLen)
+ - gamma(stats, freq.getValue(), docLen);
+
+ Explanation explanation = Explanation.match((float) score,
+ "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:",
+ subs);
+ if (score < 0) {
+ explanation = Explanation.match(0, "max of:",
+ Explanation.match(0, "Minimum legal score"),
+ explanation);
+ }
+ return explanation;
+ }
+
+ @Override
protected void explain(List<Explanation> subs, BasicStats stats, int doc,
double freq, double docLen) {
if (stats.getBoost() != 1.0d) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
index c026feb..ca5c42b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
@@ -60,7 +60,7 @@ public class AxiomaticF1EXP extends Axiomatic {
*/
@Override
protected double tf(BasicStats stats, double freq, double docLen) {
- if (freq <= 0.0) return 0.0;
+ freq += 1; // otherwise gives negative scores for freqs < 1
return 1 + Math.log(1 + Math.log(freq));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
index 2e19255..6ef3587 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
@@ -52,7 +52,7 @@ public class AxiomaticF1LOG extends Axiomatic {
*/
@Override
protected double tf(BasicStats stats, double freq, double docLen) {
- if (freq <= 0.0) return 0.0;
+ freq += 1; // otherwise gives negative scores for freqs < 1
return 1 + Math.log(1 + Math.log(freq));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
index 635dc68..a54c754 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
@@ -58,7 +58,7 @@ public class AxiomaticF3EXP extends Axiomatic {
*/
@Override
protected double tf(BasicStats stats, double freq, double docLen) {
- if (freq <= 0.0) return 0.0;
+ freq += 1; // otherwise gives negative scores for freqs < 1
return 1 + Math.log(1 + Math.log(freq));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
index 4753e4e..194b70a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
@@ -47,7 +47,7 @@ public class AxiomaticF3LOG extends Axiomatic {
*/
@Override
protected double tf(BasicStats stats, double freq, double docLen) {
- if (freq <= 0.0) return 0.0;
+ freq += 1; // otherwise gives negative scores for freqs < 1
return 1 + Math.log(1 + Math.log(freq));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
index fc05d72..2ab44df 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
@@ -34,11 +34,29 @@ public class DistributionSPL extends Distribution {
@Override
public final double score(BasicStats stats, double tfn, double lambda) {
- if (lambda == 1d) {
- lambda = 0.99d;
+ assert lambda != 1;
+
+ // tfn/(tfn+1) -> 1 - 1/(tfn+1), guaranteed to be non decreasing when tfn increases
+ double q = 1 - 1 / (tfn + 1);
+ if (q == 1) {
+ q = Math.nextDown(1.0);
+ }
+
+ double pow = Math.pow(lambda, q);
+ if (pow == lambda) {
+ // this can happen because of floating-point rounding
+ // but then we return infinity when taking the log, so we enforce
+ // that pow is different from lambda
+ if (lambda < 1) {
+ // x^y > x when x < 1 and y < 1
+ pow = Math.nextUp(lambda);
+ } else {
+ // x^y < x when x > 1 and y < 1
+ pow = Math.nextDown(lambda);
+ }
}
- return -Math.log(
- (Math.pow(lambda, (tfn / (tfn + 1))) - lambda) / (1 - lambda));
+
+ return -Math.log((pow - lambda) / (1 - lambda));
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
index 7dc320d..6b7dbb2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
@@ -30,7 +30,12 @@ public class LambdaDF extends Lambda {
@Override
public final float lambda(BasicStats stats) {
- return (stats.getDocFreq()+1F) / (stats.getNumberOfDocuments()+1F);
+ float lambda = (float) ((stats.getDocFreq() + 1.0) / (stats.getNumberOfDocuments() + 1.0));
+ if (lambda == 1) {
+ // Distribution SPL cannot work with values of lambda that are equal to 1
+ lambda = Math.nextDown(lambda);
+ }
+ return lambda;
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
index 6dc54a3..72eae4c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
@@ -30,7 +30,12 @@ public class LambdaTTF extends Lambda {
@Override
public final float lambda(BasicStats stats) {
- return (stats.getTotalTermFreq()+1F) / (stats.getNumberOfDocuments()+1F);
+ float lambda = (float) ((stats.getTotalTermFreq() + 1.0) / (stats.getNumberOfDocuments() + 1.0));
+ if (lambda == 1) {
+ // Distribution SPL cannot work with values of lambda that are equal to 1
+ lambda = Math.nextUp(lambda);
+ }
+ return lambda;
}
public final Explanation explain(BasicStats stats) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
index 16da903..9e2edf1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
@@ -16,10 +16,6 @@
*/
package org.apache.lucene.search.similarities;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns NaN scores for sloppy freqs < 1 (due to log without floor)
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
public class TestAxiomaticF1EXP extends AxiomaticTestCase {
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
index 88ad18e..0d6ba48 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
@@ -16,10 +16,6 @@
*/
package org.apache.lucene.search.similarities;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns NaN scores for sloppy freqs < 1 (due to log without floor)
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
public class TestAxiomaticF1LOG extends AxiomaticTestCase {
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
index 69ab719..63ad87a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
@@ -16,16 +16,11 @@
*/
package org.apache.lucene.search.similarities;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns negative scores at least, but it (now) warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
public class TestAxiomaticF3EXP extends AxiomaticTestCase {
@Override
protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
- // TODO: use the randomized parameters and not these hardcoded ones
- return new AxiomaticF3EXP(0.25f, 1);
+ return new AxiomaticF3EXP(s, queryLen);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
index 6863277..cb92984 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
@@ -16,16 +16,11 @@
*/
package org.apache.lucene.search.similarities;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns negative scores at least, but it (now) warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
public class TestAxiomaticF3LOG extends AxiomaticTestCase {
@Override
protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
- // TODO: use the randomized parameters and not these hardcoded ones
- return new AxiomaticF3LOG(0.25f, 1);
+ return new AxiomaticF3LOG(s, queryLen);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
index 984915a..20a2b32 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
@@ -16,10 +16,6 @@
*/
package org.apache.lucene.search.similarities;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// scores go infinite, but it warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
public class TestDistributionSPL extends DistributionTestCase {
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
index 6fd38bd..49dc154 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
@@ -54,6 +54,13 @@ public class TestSimilarity2 extends LuceneTestCase {
sims = new ArrayList<>();
sims.add(new ClassicSimilarity());
sims.add(new BM25Similarity());
+ sims.add(new BooleanSimilarity());
+ sims.add(new AxiomaticF1EXP());
+ sims.add(new AxiomaticF1LOG());
+ sims.add(new AxiomaticF2EXP());
+ sims.add(new AxiomaticF2LOG());
+ sims.add(new AxiomaticF3EXP(0.25f, 3));
+ sims.add(new AxiomaticF3LOG(0.25f, 3));
// TODO: not great that we dup this all with TestSimilarityBase
for (BasicModel basicModel : TestSimilarityBase.BASIC_MODELS) {
for (AfterEffect afterEffect : TestSimilarityBase.AFTER_EFFECTS) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
index 444e8ef..0925aee 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
@@ -89,8 +89,12 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
allSims = new ArrayList<>();
allSims.add(new ClassicSimilarity());
allSims.add(new BM25Similarity());
- // We cannot do this, because this similarity behaves in "non-traditional" ways:
- // allSims.add(new BooleanSimilarity());
+ allSims.add(new AxiomaticF1EXP());
+ allSims.add(new AxiomaticF1LOG());
+ allSims.add(new AxiomaticF2EXP());
+ allSims.add(new AxiomaticF2LOG());
+
+ allSims.add(new BooleanSimilarity());
for (BasicModel basicModel : BASIC_MODELS) {
for (AfterEffect afterEffect : AFTER_EFFECTS) {
for (Normalization normalization : NORMALIZATIONS) {
@@ -105,8 +109,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
}
}
}
- /* TODO: enable Dirichlet
- allSims.add(new LMDirichletSimilarity()); */
+ allSims.add(new LMDirichletSimilarity());
allSims.add(new LMJelinekMercerSimilarity(0.1f));
allSims.add(new LMJelinekMercerSimilarity(0.7f));
for (Independence independence : INDEPENDENCE_MEASURES) {