You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/04/16 16:54:58 UTC
svn commit: r1674085 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/search/ lucene/sandbox/
lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/
Author: jpountz
Date: Thu Apr 16 14:54:58 2015
New Revision: 1674085
URL: http://svn.apache.org/r1674085
Log:
LUCENE-6429: Removed the TermQuery(Term,int) constructor.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/TermContext.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/branches/branch_5x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1674085&r1=1674084&r2=1674085&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Apr 16 14:54:58 2015
@@ -171,6 +171,9 @@ Bug Fixes
* LUCENE-6395: Seeking by term ordinal was failing to set the term's
bytes in MemoryIndex (Mike McCandless)
+* LUCENE-6429: Removed the TermQuery(Term,int) constructor which could lead to
+ inconsistent term statistics. (Adrien Grand, Robert Muir)
+
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/TermContext.java?rev=1674085&r1=1674084&r2=1674085&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/TermContext.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/TermContext.java Thu Apr 16 14:54:58 2015
@@ -53,6 +53,7 @@ public final class TermContext {
assert context != null && context.isTopLevel;
topReaderContext = context;
docFreq = 0;
+ totalTermFreq = 0;
final int len;
if (context.leaves() == null) {
len = 1;
@@ -107,6 +108,7 @@ public final class TermContext {
*/
public void clear() {
docFreq = 0;
+ totalTermFreq = 0;
Arrays.fill(states, null);
}
@@ -160,12 +162,6 @@ public final class TermContext {
public long totalTermFreq() {
return totalTermFreq;
}
-
- /** expert: only available for queries that want to lie about docfreq
- * @lucene.internal */
- public void setDocFreq(int docFreq) {
- this.docFreq = docFreq;
- }
/** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
*
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1674085&r1=1674084&r2=1674085&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java Thu Apr 16 14:54:58 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
@@ -40,7 +41,6 @@ import org.apache.lucene.util.ToStringUt
*/
public class TermQuery extends Query {
private final Term term;
- private final int docFreq;
private final TermContext perReaderTermState;
final class TermWeight extends Weight {
@@ -138,16 +138,7 @@ public class TermQuery extends Query {
/** Constructs a query for the term <code>t</code>. */
public TermQuery(Term t) {
- this(t, -1);
- }
-
- /**
- * Expert: constructs a TermQuery that will use the provided docFreq instead
- * of looking up the docFreq against the searcher.
- */
- public TermQuery(Term t, int docFreq) {
- term = t;
- this.docFreq = docFreq;
+ term = Objects.requireNonNull(t);
perReaderTermState = null;
}
@@ -157,9 +148,8 @@ public class TermQuery extends Query {
*/
public TermQuery(Term t, TermContext states) {
assert states != null;
- term = t;
- docFreq = states.docFreq();
- perReaderTermState = states;
+ term = Objects.requireNonNull(t);
+ perReaderTermState = Objects.requireNonNull(states);
}
/** Returns the term of this query. */
@@ -181,9 +171,6 @@ public class TermQuery extends Query {
termState = this.perReaderTermState;
}
- // we must not ignore the given docFreq - if set use the given value (lie)
- if (docFreq != -1) termState.setDocFreq(docFreq);
-
return new TermWeight(searcher, needsScores, termState);
}
Modified: lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java?rev=1674085&r1=1674084&r2=1674085&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java Thu Apr 16 14:54:58 2015
@@ -27,12 +27,21 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostAttribute;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
@@ -256,6 +265,27 @@ public class FuzzyLikeThisQuery extends
}
}
+ private Query newTermQuery(IndexReader reader, Term term) throws IOException {
+ if (ignoreTF) {
+ return new ConstantScoreQuery(new TermQuery(term));
+ } else {
+ // we build an artificial TermContext that will give an overall df and ttf
+ // equal to 1
+ TermContext context = new TermContext(reader.getContext());
+ for (LeafReaderContext leafContext : reader.leaves()) {
+ Terms terms = leafContext.reader().terms(term.field());
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ if (termsEnum.seekExact(term.bytes())) {
+ int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
+ context.register(termsEnum.termState(), leafContext.ord, freq, freq);
+ }
+ }
+ }
+ return new TermQuery(term, context);
+ }
+ }
+
@Override
public Query rewrite(IndexReader reader) throws IOException
{
@@ -298,7 +328,7 @@ public class FuzzyLikeThisQuery extends
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
- Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
+ Query tq = newTermQuery(reader, st.term);
tq.setBoost(st.score); // set the boost to a mix of IDF and score
bq.add(tq, BooleanClause.Occur.SHOULD);
}
@@ -310,7 +340,7 @@ public class FuzzyLikeThisQuery extends
{
ScoreTerm st = iterator2.next();
// found a match
- Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
+ Query tq = newTermQuery(reader, st.term);
tq.setBoost(st.score); // set the boost using the ScoreTerm's score
termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query
}