You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2015/08/03 00:31:19 UTC

svn commit: r1693833 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/search/ core/src/java/org/apache/solr/util/ core/src/test/org/apache/solr/util/ solrj/src/java/org/apache/solr/common/params/

Author: janhoy
Date: Sun Aug  2 22:31:19 2015
New Revision: 1693833

URL: http://svn.apache.org/r1693833
Log:
SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Aug  2 22:31:19 2015
@@ -50,6 +50,11 @@ Upgrading from Solr 5.x
 Detailed Change List
 ----------------------
 
+New Features
+----------------------
+
+* SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue (janhoy)
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java Sun Aug  2 22:31:19 2015
@@ -243,7 +243,8 @@ public class DisMaxQParser extends QPars
     if (dis instanceof BooleanQuery) {
       BooleanQuery.Builder t = new BooleanQuery.Builder();
       SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) dis);
-      SolrPluginUtils.setMinShouldMatch(t, minShouldMatch);
+      boolean mmAutoRelax = params.getBool(DisMaxParams.MM_AUTORELAX, false);
+      SolrPluginUtils.setMinShouldMatch(t, minShouldMatch, mmAutoRelax);
       query = t.build();
     }
     return query;

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java Sun Aug  2 22:31:19 2015
@@ -299,7 +299,7 @@ public class ExtendedDismaxQParser exten
     if (query instanceof BooleanQuery) {
       BooleanQuery.Builder t = new BooleanQuery.Builder();
       SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query);
-      SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch);
+      SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
       query = t.build();
     }
     return query;
@@ -341,7 +341,7 @@ public class ExtendedDismaxQParser exten
     // were explicit operators (except for AND).
     boolean doMinMatched = doMinMatched(clauses, config.lowercaseOperators);
     if (doMinMatched && query instanceof BooleanQuery) {
-      query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch);
+      query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch, config.mmAutoRelax);
     }
     return query;
   }
@@ -1238,7 +1238,7 @@ public class ExtendedDismaxQParser exten
             if (query instanceof BooleanQuery) {
               BooleanQuery bq = (BooleanQuery) query;
               if (!bq.isCoordDisabled()) {
-                query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
+                query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false);
               }
             }
             if (query instanceof PhraseQuery) {
@@ -1490,6 +1490,8 @@ public class ExtendedDismaxQParser exten
     protected int qslop;
     
     protected boolean stopwords;
+
+    protected boolean mmAutoRelax;
     
     protected String altQ;
     
@@ -1527,6 +1529,8 @@ public class ExtendedDismaxQParser exten
       qslop = solrParams.getInt(DisMaxParams.QS, 0);
       
       stopwords = solrParams.getBool(DMP.STOPWORDS, true);
+
+      mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
       
       altQ = solrParams.get( DisMaxParams.ALTQ );
       

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java Sun Aug  2 22:31:19 2015
@@ -560,11 +560,14 @@ public class SolrPluginUtils {
     return out;
   }
 
-
   /**
    * Checks the number of optional clauses in the query, and compares it
    * with the specification string to determine the proper value to use.
-   *
+   * <p>
+   * If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
+   * are removed from some but not all DisMax clauses, as can happen when
+   * stopwords or punctuation tokens are removed in analysis.
+   * </p>
    * <p>
    * Details about the specification format can be found
    * <a href="doc-files/min-should-match.html">here</a>
@@ -589,29 +592,54 @@ public class SolrPluginUtils {
    * <p>:TODO: should optimize the case where number is same
    * as clauses to just make them all "required"
    * </p>
+   *
+   * @param q The query as a BooleanQuery.Builder
+   * @param spec The mm spec
+   * @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
    */
-  public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
+  public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
 
     int optionalClauses = 0;
+    int maxDisjunctsSize = 0;
+    int optionalDismaxClauses = 0;
     for (BooleanClause c : q.build().clauses()) {
       if (c.getOccur() == Occur.SHOULD) {
-        optionalClauses++;
+        if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
+          int numDisjuncts = ((DisjunctionMaxQuery)c.getQuery()).getDisjuncts().size();
+          if (numDisjuncts>maxDisjunctsSize) {
+            maxDisjunctsSize = numDisjuncts;
+            optionalDismaxClauses = 1;
+          }
+          else if (numDisjuncts == maxDisjunctsSize) {
+            optionalDismaxClauses++;
+          }
+        } else {
+          optionalClauses++;
+        }
       }
     }
 
-    int msm = calculateMinShouldMatch(optionalClauses, spec);
+    int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
     if (0 < msm) {
       q.setMinimumNumberShouldMatch(msm);
     }
   }
 
+  public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
+    setMinShouldMatch(q, spec, false);
+  }
+
   public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec) {
+    return setMinShouldMatch(q, spec, false);
+  }
+
+  public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) {
     BooleanQuery.Builder builder = new BooleanQuery.Builder();
     builder.setDisableCoord(q.isCoordDisabled());
     for (BooleanClause clause : q) {
       builder.add(clause);
     }
-    setMinShouldMatch(builder, spec);
+    setMinShouldMatch(builder, spec, mmAutoRelax);
     return builder.build();
   }
 

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java Sun Aug  2 22:31:19 2015
@@ -390,6 +390,66 @@ public class SolrPluginUtilsTest extends
         
   }
 
+  @Test
+  public void testMinShouldMatchAutoRelax() {
+    /* The basics should not be affected by autoRelax */
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+    q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
+    q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
+    q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
+    q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
+
+    SolrPluginUtils.setMinShouldMatch(q, "0", true);
+    assertEquals(0, q.build().getMinimumNumberShouldMatch());
+
+    SolrPluginUtils.setMinShouldMatch(q, "1", true);
+    assertEquals(1, q.build().getMinimumNumberShouldMatch());
+
+    SolrPluginUtils.setMinShouldMatch(q, "50%", true);
+    assertEquals(2, q.build().getMinimumNumberShouldMatch());
+
+    SolrPluginUtils.setMinShouldMatch(q, "99", true);
+    assertEquals(4, q.build().getMinimumNumberShouldMatch());
+
+    q.add(new TermQuery(new Term("a","e")), Occur.MUST);
+    q.add(new TermQuery(new Term("a","f")), Occur.MUST);
+
+    SolrPluginUtils.setMinShouldMatch(q, "50%", true);
+    assertEquals(2, q.build().getMinimumNumberShouldMatch());
+
+    /* Simulate stopwords through uneven disjuncts */
+    q = new BooleanQuery.Builder();
+    DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(0.0f);
+    dmq.add(new TermQuery(new Term("a","foo")));
+    q.add(dmq, Occur.SHOULD);
+    dmq = new DisjunctionMaxQuery(0.0f);
+    dmq.add(new TermQuery(new Term("a","foo")));
+    dmq.add(new TermQuery(new Term("b","foo")));
+    q.add(dmq, Occur.SHOULD);
+    dmq = new DisjunctionMaxQuery(0.0f);
+    dmq.add(new TermQuery(new Term("a","bar")));
+    dmq.add(new TermQuery(new Term("b","bar")));
+    q.add(dmq, Occur.SHOULD);
+
+    // Without relax
+    SolrPluginUtils.setMinShouldMatch(q, "100%", false);
+    assertEquals(3, q.build().getMinimumNumberShouldMatch());
+
+    // With relax
+    SolrPluginUtils.setMinShouldMatch(q, "100%", true);
+    assertEquals(2, q.build().getMinimumNumberShouldMatch());
+
+    // Still same result with a MUST clause extra
+    q.add(new TermQuery(new Term("a","must")), Occur.MUST);
+    SolrPluginUtils.setMinShouldMatch(q, "100%", true);
+    assertEquals(2, q.build().getMinimumNumberShouldMatch());
+
+    // Combination of dismax and non-dismax SHOULD clauses
+    q.add(new TermQuery(new Term("b","should")), Occur.SHOULD);
+    SolrPluginUtils.setMinShouldMatch(q, "100%", true);
+    assertEquals(3, q.build().getMinimumNumberShouldMatch());
+  }
+
   /** macro */
   public String pe(CharSequence s) {
     return SolrPluginUtils.partialEscape(s).toString();

Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java?rev=1693833&r1=1693832&r2=1693833&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java Sun Aug  2 22:31:19 2015
@@ -42,7 +42,12 @@ public interface DisMaxParams {
   
   /** query and init param for MinShouldMatch specification */
   public static String MM = "mm";
-  
+
+  /**
+   * If set to true, will try to reduce MM if tokens are removed from some clauses but not all
+   */
+  public static String MM_AUTORELAX = "mm.autoRelax";
+
   /**
    * query and init param for Phrase Slop value in phrase
    * boost query (in pf fields)