You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/12/11 17:39:13 UTC

[1/2] lucene-solr:master: SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting unneccessary and excessive '0' count terms from each shard

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 42fadfc03 -> 41113ecbb
  refs/heads/master 9ad84fea8 -> efc2f32ea


SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting unneccessary and excessive '0' count terms from each shard


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efc2f32e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efc2f32e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efc2f32e

Branch: refs/heads/master
Commit: efc2f32ea05029edff9144f163d0619d091d1ba3
Parents: 9ad84fe
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Dec 11 10:26:55 2017 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Dec 11 10:26:55 2017 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  5 +++-
 .../solr/handler/component/FacetComponent.java  | 27 +++-----------------
 .../apache/solr/cloud/TestCloudPivotFacet.java  |  9 -------
 .../apache/solr/common/params/FacetParams.java  |  6 ++++-
 4 files changed, 13 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efc2f32e/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1218171..a189103 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -45,8 +45,11 @@ Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.10
 Jetty 9.3.20.v20170531
 
+Optimizations
+----------------------
 
-(No Changes)
+* SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting
+  unneccessary and excessive '0' count terms from each shard (Houston Putman via hossman)
 
 
 ==================  7.2.0 ==================

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efc2f32e/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
index e1a6bc4..f0f5109 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
@@ -571,21 +571,8 @@ public class FacetComponent extends SearchComponent {
           // set the initial limit higher to increase accuracy
           dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio, 
                                                dff.overrequestCount);
-          
-          // If option FACET_DISTRIB_MCO is turned on then we will use 1 as the initial 
-          // minCount (unless the user explicitly set it to something less than 1). If 
-          // option FACET_DISTRIB_MCO is turned off then we will use 0 as the initial 
-          // minCount regardless of what the user might have provided (prior to the
-          // addition of the FACET_DISTRIB_MCO option the default logic was to use 0).
-          // As described in issues SOLR-8559 and SOLR-8988 the use of 1 provides a 
-          // significant performance boost.
-          dff.initialMincount = dff.mco ? Math.min(dff.minCount, 1) : 0;
-                                   
-        } else {
-          // if limit==-1, then no need to artificially lower mincount to 0 if
-          // it's 1
-          dff.initialMincount = Math.min(dff.minCount, 1);
         }
+        dff.initialMincount = Math.min(dff.minCount, 1);
       } else {
         // we're sorting by index order.
         // if minCount==0, we should always be able to get accurate results w/o
@@ -682,10 +669,8 @@ public class FacetComponent extends SearchComponent {
     } else if ( FacetParams.FACET_SORT_COUNT.equals(sort) ) {
       if ( 0 < requestedLimit ) {
         shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
-        shardMinCount = 0; 
-      } else {
-        shardMinCount = Math.min(requestedMinCount, 1);
       }
+      shardMinCount = Math.min(requestedMinCount, 1);
     } 
     sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
     sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
@@ -1437,7 +1422,6 @@ public class FacetComponent extends SearchComponent {
     
     public int initialLimit; // how many terms requested in first phase
     public int initialMincount; // mincount param sent to each shard
-    public boolean mco;
     public double overrequestRatio;
     public int overrequestCount;
     public boolean needRefinements;
@@ -1456,9 +1440,6 @@ public class FacetComponent extends SearchComponent {
         = params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
       this.overrequestCount 
         = params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10);
-      
-      this.mco 
-      = params.getFieldBool(field, FacetParams.FACET_DISTRIB_MCO, false);
     }
     
     void add(int shardNum, NamedList shardCounts, int numRequested) {
@@ -1496,10 +1477,10 @@ public class FacetComponent extends SearchComponent {
         }
       }
       
-      // the largest possible missing term is initialMincount if we received
+      // the largest possible missing term is (initialMincount - 1) if we received
       // less than the number requested.
       if (numRequested < 0 || numRequested != 0 && numReceived < numRequested) {
-        last = initialMincount;
+        last = Math.max(0, initialMincount - 1);
       }
       
       missingMaxPossible += last;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efc2f32e/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
index fb9c86a..fa977da 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
@@ -53,7 +53,6 @@ import static org.apache.solr.common.params.FacetParams.FACET_OVERREQUEST_RATIO;
 import static org.apache.solr.common.params.FacetParams.FACET_PIVOT;
 import static org.apache.solr.common.params.FacetParams.FACET_PIVOT_MINCOUNT;
 import static org.apache.solr.common.params.FacetParams.FACET_SORT;
-import static org.apache.solr.common.params.FacetParams.FACET_DISTRIB_MCO;
 
 /**
  * <p>
@@ -85,8 +84,6 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
   // param used by test purely for tracing & validation
   private static String TRACE_MIN = "_test_min";
   // param used by test purely for tracing & validation
-  private static String TRACE_DISTRIB_MIN = "_test_distrib_min";
-  // param used by test purely for tracing & validation
   private static String TRACE_MISS = "_test_miss";
   // param used by test purely for tracing & validation
   private static String TRACE_SORT = "_test_sort";
@@ -200,12 +197,6 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
       }
       
       if (random().nextBoolean()) {
-        pivotP.add(FACET_DISTRIB_MCO, "true");
-        // trace param for validation
-        baseP.add(TRACE_DISTRIB_MIN, "true");
-      }
-
-      if (random().nextBoolean()) {
         String missing = ""+random().nextBoolean();
         pivotP.add(FACET_MISSING, missing);
         // trace param for validation

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efc2f32e/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
index 52e5f4a..b61df07 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
@@ -135,8 +135,12 @@ public interface FacetParams {
    * In SOLR-8599 and SOLR-8988, significant performance increase has been seen when enabling this optimization.
    *
    * Note: enabling this flag has no effect when the conditions above are not met. For those other cases the default behavior is sufficient.
+   *
+   * @deprecated
+   * This option is no longer used nor will if affect any queries as the fix has been built in. (SOLR-11711)
+   * This will be removed entirely in 8.0.0
    */
-
+  @Deprecated
   public static final String FACET_DISTRIB_MCO = FACET_DISTRIB + ".mco";
   
   /**


[2/2] lucene-solr:branch_7x: SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting unneccessary and excessive '0' count terms from each shard

Posted by ho...@apache.org.
SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting unneccessary and excessive '0' count terms from each shard

(cherry picked from commit efc2f32ea05029edff9144f163d0619d091d1ba3)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/41113ecb
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/41113ecb
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/41113ecb

Branch: refs/heads/branch_7x
Commit: 41113ecbb62694e7e07bd236ecd0b5169bd62547
Parents: 42fadfc
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Dec 11 10:26:55 2017 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Dec 11 10:27:08 2017 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  5 +++-
 .../solr/handler/component/FacetComponent.java  | 27 +++-----------------
 .../apache/solr/cloud/TestCloudPivotFacet.java  |  9 -------
 .../apache/solr/common/params/FacetParams.java  |  6 ++++-
 4 files changed, 13 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41113ecb/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 28f962a..9f71c72 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -29,8 +29,11 @@ Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.10
 Jetty 9.3.20.v20170531
 
+Optimizations
+----------------------
 
-(No Changes)
+* SOLR-11711: Fixed distributed processing of facet.field/facet.pivot sub requests to prevent requesting
+  unneccessary and excessive '0' count terms from each shard (Houston Putman via hossman)
 
 
 ==================  7.2.0 ==================

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41113ecb/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
index e1a6bc4..f0f5109 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
@@ -571,21 +571,8 @@ public class FacetComponent extends SearchComponent {
           // set the initial limit higher to increase accuracy
           dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio, 
                                                dff.overrequestCount);
-          
-          // If option FACET_DISTRIB_MCO is turned on then we will use 1 as the initial 
-          // minCount (unless the user explicitly set it to something less than 1). If 
-          // option FACET_DISTRIB_MCO is turned off then we will use 0 as the initial 
-          // minCount regardless of what the user might have provided (prior to the
-          // addition of the FACET_DISTRIB_MCO option the default logic was to use 0).
-          // As described in issues SOLR-8559 and SOLR-8988 the use of 1 provides a 
-          // significant performance boost.
-          dff.initialMincount = dff.mco ? Math.min(dff.minCount, 1) : 0;
-                                   
-        } else {
-          // if limit==-1, then no need to artificially lower mincount to 0 if
-          // it's 1
-          dff.initialMincount = Math.min(dff.minCount, 1);
         }
+        dff.initialMincount = Math.min(dff.minCount, 1);
       } else {
         // we're sorting by index order.
         // if minCount==0, we should always be able to get accurate results w/o
@@ -682,10 +669,8 @@ public class FacetComponent extends SearchComponent {
     } else if ( FacetParams.FACET_SORT_COUNT.equals(sort) ) {
       if ( 0 < requestedLimit ) {
         shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
-        shardMinCount = 0; 
-      } else {
-        shardMinCount = Math.min(requestedMinCount, 1);
       }
+      shardMinCount = Math.min(requestedMinCount, 1);
     } 
     sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
     sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
@@ -1437,7 +1422,6 @@ public class FacetComponent extends SearchComponent {
     
     public int initialLimit; // how many terms requested in first phase
     public int initialMincount; // mincount param sent to each shard
-    public boolean mco;
     public double overrequestRatio;
     public int overrequestCount;
     public boolean needRefinements;
@@ -1456,9 +1440,6 @@ public class FacetComponent extends SearchComponent {
         = params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
       this.overrequestCount 
         = params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10);
-      
-      this.mco 
-      = params.getFieldBool(field, FacetParams.FACET_DISTRIB_MCO, false);
     }
     
     void add(int shardNum, NamedList shardCounts, int numRequested) {
@@ -1496,10 +1477,10 @@ public class FacetComponent extends SearchComponent {
         }
       }
       
-      // the largest possible missing term is initialMincount if we received
+      // the largest possible missing term is (initialMincount - 1) if we received
       // less than the number requested.
       if (numRequested < 0 || numRequested != 0 && numReceived < numRequested) {
-        last = initialMincount;
+        last = Math.max(0, initialMincount - 1);
       }
       
       missingMaxPossible += last;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41113ecb/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
index fb9c86a..fa977da 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java
@@ -53,7 +53,6 @@ import static org.apache.solr.common.params.FacetParams.FACET_OVERREQUEST_RATIO;
 import static org.apache.solr.common.params.FacetParams.FACET_PIVOT;
 import static org.apache.solr.common.params.FacetParams.FACET_PIVOT_MINCOUNT;
 import static org.apache.solr.common.params.FacetParams.FACET_SORT;
-import static org.apache.solr.common.params.FacetParams.FACET_DISTRIB_MCO;
 
 /**
  * <p>
@@ -85,8 +84,6 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
   // param used by test purely for tracing & validation
   private static String TRACE_MIN = "_test_min";
   // param used by test purely for tracing & validation
-  private static String TRACE_DISTRIB_MIN = "_test_distrib_min";
-  // param used by test purely for tracing & validation
   private static String TRACE_MISS = "_test_miss";
   // param used by test purely for tracing & validation
   private static String TRACE_SORT = "_test_sort";
@@ -200,12 +197,6 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
       }
       
       if (random().nextBoolean()) {
-        pivotP.add(FACET_DISTRIB_MCO, "true");
-        // trace param for validation
-        baseP.add(TRACE_DISTRIB_MIN, "true");
-      }
-
-      if (random().nextBoolean()) {
         String missing = ""+random().nextBoolean();
         pivotP.add(FACET_MISSING, missing);
         // trace param for validation

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41113ecb/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
index 52e5f4a..b61df07 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
@@ -135,8 +135,12 @@ public interface FacetParams {
    * In SOLR-8599 and SOLR-8988, significant performance increase has been seen when enabling this optimization.
    *
    * Note: enabling this flag has no effect when the conditions above are not met. For those other cases the default behavior is sufficient.
+   *
+   * @deprecated
+   * This option is no longer used nor will if affect any queries as the fix has been built in. (SOLR-11711)
+   * This will be removed entirely in 8.0.0
    */
-
+  @Deprecated
   public static final String FACET_DISTRIB_MCO = FACET_DISTRIB + ".mco";
   
   /**