You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/04/16 16:53:09 UTC

svn commit: r1674084 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/index/TermContext.java core/src/java/org/apache/lucene/search/TermQuery.java sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java

Author: jpountz
Date: Thu Apr 16 14:53:09 2015
New Revision: 1674084

URL: http://svn.apache.org/r1674084
Log:
LUCENE-6429: Removed the TermQuery(Term,int) constructor.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1674084&r1=1674083&r2=1674084&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Apr 16 14:53:09 2015
@@ -200,6 +200,9 @@ Bug Fixes
 * LUCENE-6395: Seeking by term ordinal was failing to set the term's
   bytes in MemoryIndex (Mike McCandless)
 
+* LUCENE-6429: Removed the TermQuery(Term,int) constructor which could lead to
+  inconsistent term statistics. (Adrien Grand, Robert Muir)
+
 Optimizations
 
 * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java?rev=1674084&r1=1674083&r2=1674084&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java Thu Apr 16 14:53:09 2015
@@ -53,6 +53,7 @@ public final class TermContext {
     assert context != null && context.isTopLevel;
     topReaderContext = context;
     docFreq = 0;
+    totalTermFreq = 0;
     final int len;
     if (context.leaves() == null) {
       len = 1;
@@ -107,6 +108,7 @@ public final class TermContext {
    */
   public void clear() {
     docFreq = 0;
+    totalTermFreq = 0;
     Arrays.fill(states, null);
   }
 
@@ -160,12 +162,6 @@ public final class TermContext {
   public long totalTermFreq() {
     return totalTermFreq;
   }
-  
-  /** expert: only available for queries that want to lie about docfreq
-   * @lucene.internal */
-  public void setDocFreq(int docFreq) {
-    this.docFreq = docFreq;
-  }
 
   /** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
    *

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1674084&r1=1674083&r2=1674084&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java Thu Apr 16 14:53:09 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
+import java.util.Objects;
 import java.util.Set;
 
 import org.apache.lucene.index.IndexReaderContext;
@@ -40,7 +41,6 @@ import org.apache.lucene.util.ToStringUt
  */
 public class TermQuery extends Query {
   private final Term term;
-  private final int docFreq;
   private final TermContext perReaderTermState;
   
   final class TermWeight extends Weight {
@@ -138,16 +138,7 @@ public class TermQuery extends Query {
   
   /** Constructs a query for the term <code>t</code>. */
   public TermQuery(Term t) {
-    this(t, -1);
-  }
-  
-  /**
-   * Expert: constructs a TermQuery that will use the provided docFreq instead
-   * of looking up the docFreq against the searcher.
-   */
-  public TermQuery(Term t, int docFreq) {
-    term = t;
-    this.docFreq = docFreq;
+    term = Objects.requireNonNull(t);
     perReaderTermState = null;
   }
   
@@ -157,9 +148,8 @@ public class TermQuery extends Query {
    */
   public TermQuery(Term t, TermContext states) {
     assert states != null;
-    term = t;
-    docFreq = states.docFreq();
-    perReaderTermState = states;
+    term = Objects.requireNonNull(t);
+    perReaderTermState = Objects.requireNonNull(states);
   }
   
   /** Returns the term of this query. */
@@ -181,9 +171,6 @@ public class TermQuery extends Query {
       termState = this.perReaderTermState;
     }
     
-    // we must not ignore the given docFreq - if set use the given value (lie)
-    if (docFreq != -1) termState.setDocFreq(docFreq);
-    
     return new TermWeight(searcher, needsScores, termState);
   }
   

Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java?rev=1674084&r1=1674083&r2=1674084&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java Thu Apr 16 14:53:09 2015
@@ -27,12 +27,21 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostAttribute;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
@@ -256,6 +265,27 @@ public class FuzzyLikeThisQuery extends
     }
   }
 
+  private Query newTermQuery(IndexReader reader, Term term) throws IOException {
+    if (ignoreTF) {
+      return new ConstantScoreQuery(new TermQuery(term));
+    } else {
+      // we build an artificial TermContext that will give an overall df and ttf
+      // equal to 1
+      TermContext context = new TermContext(reader.getContext());
+      for (LeafReaderContext leafContext : reader.leaves()) {
+        Terms terms = leafContext.reader().terms(term.field());
+        if (terms != null) {
+          TermsEnum termsEnum = terms.iterator();
+          if (termsEnum.seekExact(term.bytes())) {
+            int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
+            context.register(termsEnum.termState(), leafContext.ord, freq, freq);
+          }
+        }
+      }
+      return new TermQuery(term, context);
+    }
+  }
+
   @Override
     public Query rewrite(IndexReader reader) throws IOException
     {
@@ -298,7 +328,7 @@ public class FuzzyLikeThisQuery extends
             {
                 //optimize where only one selected variant
                 ScoreTerm st= variants.get(0);
-                Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
+                Query tq = newTermQuery(reader, st.term);
                 tq.setBoost(st.score); // set the boost to a mix of IDF and score
                 bq.add(tq, BooleanClause.Occur.SHOULD); 
             }
@@ -310,7 +340,7 @@ public class FuzzyLikeThisQuery extends
                 {
                     ScoreTerm st = iterator2.next();
                     // found a match
-                    Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);                    
+                    Query tq = newTermQuery(reader, st.term);
                     tq.setBoost(st.score); // set the boost using the ScoreTerm's score
                     termVariants.add(tq, BooleanClause.Occur.SHOULD);          // add to query                    
                 }