You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2016/04/21 23:49:24 UTC

[3/7] lucene-solr:branch_5x: SOLR-8420: Fix long overflow in sumOfSquares for Date statistics

SOLR-8420: Fix long overflow in sumOfSquares for Date statistics

Casted operations to double. Changed the test to support a percentage error given the FUZZY flag in doubles


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2beccf46
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2beccf46
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2beccf46

Branch: refs/heads/branch_5x
Commit: 2beccf469f9e07eb5a05fef9ec3f869d6da4008a
Parents: 20e2cab
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed Feb 24 19:02:17 2016 -0800
Committer: anshum <an...@apache.org>
Committed: Thu Apr 21 14:00:11 2016 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../handler/component/StatsValuesFactory.java   |  4 +-
 .../org/apache/solr/TestDistributedSearch.java  | 11 ++++-
 .../handler/component/StatsComponentTest.java   | 42 +++++++++++++++++++-
 .../solr/BaseDistributedSearchTestCase.java     | 32 +++++++++++++++
 5 files changed, 88 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2beccf46/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 07efaa3..0904b6f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -98,6 +98,9 @@ Bug Fixes
 * SOLR-8599: After a failed connection during construction of SolrZkClient attempt to retry until a connection
   can be made. (Keith Laban, Dennis Gove)
 
+* SOLR-8420: Fix long overflow in sumOfSquares for Date statistics. (Tom Hill, Christine Poerschke,
+  Tomás Fernández Löbbe)
+
 ======================= 5.5.0 =======================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2beccf46/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
index 6565b6e..5003709 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
@@ -751,7 +751,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
   public void updateTypeSpecificStats(Date v, int count) {
     long value = v.getTime();
     if (computeSumOfSquares) {
-      sumOfSquares += (value * value * count); // for std deviation
+      sumOfSquares += ((double)value * value * count); // for std deviation
     }
     if (computeSum) {
       sum += value * count;
@@ -807,7 +807,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
     if (count <= 1) {
       return 0.0D;
     }
-    return Math.sqrt(((count * sumOfSquares) - (sum * sum))
+    return Math.sqrt(((count * sumOfSquares) - (sum * (double)sum))
         / (count * (count - 1.0D)));
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2beccf46/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
index b6750c2..4b83801 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
@@ -425,8 +425,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
     
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", "stats_dt");
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
+
+    handle.put("stddev", FUZZY);
+    handle.put("sumOfSquares", FUZZY);
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
+    handle.remove("stddev");
+    handle.remove("sumOfSquares");
 
 
     rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true", 
@@ -522,6 +527,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
           "stats.field", "{!key=special_key}stats_dt",
           "stats.field", "{!ex=xxx}stats_dt");
 
+    handle.put("stddev", FUZZY);
+    handle.put("sumOfSquares", FUZZY);
     query("q","*:*", "sort",i1+" desc", "stats", "true",
           // do a really simple query so distributed IDF doesn't cause problems
           // when comparing with control collection
@@ -838,7 +845,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
             
             paras.append("}").append(field);
             numTotalStatQueries++;
-            rsp = query("q","*:*", "rows", "0", "stats", "true",
+            rsp = query("q", q, "rows", "0", "stats", "true",
                         "stats.field", paras.toString());
             // simple assert, mostly relying on comparison with single shard
             FieldStatsInfo s = rsp.getFieldStatsInfo().get("k");
@@ -850,6 +857,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
         }
       }
     }
+    handle.remove("stddev");
+    handle.remove("sumOfSquares");
     assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" + 
                  " (adjust constant accordingly if intentional)",
                  5082, numTotalStatQueries);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2beccf46/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
index 33dd13b..db6d137 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
@@ -54,7 +54,6 @@ import org.apache.solr.util.AbstractSolrTestCase;
 
 import org.apache.commons.math3.util.Combinations;
 import com.tdunning.math.stats.AVLTreeDigest;
-import com.google.common.hash.Hashing; 
 import com.google.common.hash.HashFunction;
 import org.apache.solr.util.hll.HLL;
 
@@ -465,6 +464,47 @@ public class StatsComponentTest extends AbstractSolrTestCase {
 
   }
 
+  // Check for overflow of sumOfSquares
+  public void testFieldStatisticsResultsDateFieldOverflow() throws Exception {
+    SolrCore core = h.getCore();
+
+    assertU(adoc("id", "1", "active_dt", "2015-12-14T09:00:00Z"));
+    assertU(commit());
+
+    Map<String, String> args = new HashMap<>();
+    args.put(CommonParams.Q, "*:*");
+    args.put(StatsParams.STATS, "true");
+    args.put(StatsParams.STATS_FIELD, "active_dt");
+    args.put("indent", "true");
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+    assertQ("test date statistics values", req,
+            "//long[@name='count'][.='1']",
+            "//date[@name='min'][.='2015-12-14T09:00:00Z']",
+            "//date[@name='max'][.='2015-12-14T09:00:00Z']",
+            "//date[@name='sum'][.='2015-12-14T09:00:00Z']",
+            "//date[@name='mean'][.='2015-12-14T09:00:00Z']",
+            "//double[@name='sumOfSquares'][.='" + Double.toString(2102742446988960000000000.0)+"']"
+            );
+
+    assertU(adoc("id", "2", "active_dt", "2115-12-14T09:00:00Z"));
+    assertU(adoc("id", "3", "active_dt", "2215-12-14T09:00:00Z"));
+    assertU(commit());
+
+    assertQ("test date statistics values", req,
+        "//long[@name='count'][.='3']",
+        "//date[@name='min'][.='2015-12-14T09:00:00Z']",
+        "//date[@name='max'][.='2215-12-14T09:00:00Z']",
+        "//date[@name='sum'][.='2407-11-08T03:00:00Z']",
+        "//date[@name='mean'][.='2115-12-14T09:00:00Z']",
+        "//double[@name='sumOfSquares'][.='" + Double.toString(83555549895529430000000000.0)+"']",
+        // The following number matches the number returned by the current solr
+        // implementation of standard deviation. Should be 3155673600000.
+        // That number is not precise, and the implementation should be fixed.
+        "//double[@name='stddev'][.='" + Double.toString(3155673599999.999)+"']"
+        );
+  }
+
 
   public void doTestFieldStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception {
     assertU(adoc("id", "1", f, "-10"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2beccf46/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
index ad006a4..812663e 100644
--- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
@@ -240,6 +240,13 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
   public static int SKIP = 2;
   public static int SKIPVAL = 4;
   public static int UNORDERED = 8;
+  
+  /**
+   * When this flag is set, Double values will be allowed a difference ratio of 1E-8
+   * between the non-distributed and the distributed returned values
+   */
+  public static int FUZZY = 16;
+  private static final double DOUBLE_RATIO_LIMIT = 1E-8;
 
   protected int flags;
   protected Map<String, Integer> handle = new HashMap<>();
@@ -876,6 +883,31 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
 
     }
 
+    if ((flags & FUZZY) != 0) {
+      if ((a instanceof Double && b instanceof Double)) {
+        double aaa = ((Double) a).doubleValue();
+        double bbb = ((Double) b).doubleValue();
+        if (aaa == bbb || ((Double) a).isNaN() && ((Double) b).isNaN()) {
+          return null;
+        }
+        if ((aaa == 0.0) || (bbb == 0.0)) {
+            return ":" + a + "!=" + b;
+        }
+
+        double diff = Math.abs(aaa - bbb);
+        // When stats computations are done on multiple shards, there may
+        // be small differences in the results. Allow a small difference
+        // between the result of the computations.
+
+        double ratio = Math.max(Math.abs(diff / aaa), Math.abs(diff / bbb));
+        if (ratio > DOUBLE_RATIO_LIMIT) {
+          return ":" + a + "!=" + b;
+        } else {
+          return null;// close enough.
+        }
+      }
+    }
+
     if (!(a.equals(b))) {
       return ":" + a + "!=" + b;
     }