You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2016/02/25 04:04:49 UTC
lucene-solr git commit: SOLR-8420: Fix long overflow in sumOfSquares
for Date statistics
Repository: lucene-solr
Updated Branches:
refs/heads/master dc95d5248 -> 730d10f14
SOLR-8420: Fix long overflow in sumOfSquares for Date statistics
Casted operations to double. Changed the test to support a percentage error given the FUZZY flag in doubles
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/730d10f1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/730d10f1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/730d10f1
Branch: refs/heads/master
Commit: 730d10f145378b164a93d63b82a02dcf7f2fdf14
Parents: dc95d52
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed Feb 24 19:02:17 2016 -0800
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Wed Feb 24 19:02:17 2016 -0800
----------------------------------------------------------------------
solr/CHANGES.txt | 3 ++
.../handler/component/StatsValuesFactory.java | 4 +-
.../org/apache/solr/TestDistributedSearch.java | 11 ++++-
.../handler/component/StatsComponentTest.java | 42 +++++++++++++++++++-
.../solr/BaseDistributedSearchTestCase.java | 32 +++++++++++++++
5 files changed, 88 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/730d10f1/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 87300bc..e1f9e826 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -218,6 +218,9 @@ Bug Fixes
* SOLR-8696: Start the Overseer before actions that need the overseer on init and when reconnecting after
zk expiration and improve init logic. (Scott Blum, Mark Miller)
+* SOLR-8420: Fix long overflow in sumOfSquares for Date statistics. (Tom Hill, Christine Poerschke,
+ Tomás Fernández Löbbe)
+
Optimizations
----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/730d10f1/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
index 47f3851..ec61153 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
@@ -751,7 +751,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
public void updateTypeSpecificStats(Date v, int count) {
long value = v.getTime();
if (computeSumOfSquares) {
- sumOfSquares += (value * value * count); // for std deviation
+ sumOfSquares += ((double)value * value * count); // for std deviation
}
if (computeSum) {
sum += value * count;
@@ -807,7 +807,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
if (count <= 1) {
return 0.0D;
}
- return Math.sqrt(((count * sumOfSquares) - (sum * sum))
+ return Math.sqrt(((count * sumOfSquares) - (sum * (double)sum))
/ (count * (count - 1.0D)));
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/730d10f1/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
index b6750c2..4b83801 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
@@ -425,8 +425,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", "stats_dt");
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
+
+ handle.put("stddev", FUZZY);
+ handle.put("sumOfSquares", FUZZY);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
+ handle.remove("stddev");
+ handle.remove("sumOfSquares");
rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true",
@@ -522,6 +527,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
"stats.field", "{!key=special_key}stats_dt",
"stats.field", "{!ex=xxx}stats_dt");
+ handle.put("stddev", FUZZY);
+ handle.put("sumOfSquares", FUZZY);
query("q","*:*", "sort",i1+" desc", "stats", "true",
// do a really simple query so distributed IDF doesn't cause problems
// when comparing with control collection
@@ -838,7 +845,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
paras.append("}").append(field);
numTotalStatQueries++;
- rsp = query("q","*:*", "rows", "0", "stats", "true",
+ rsp = query("q", q, "rows", "0", "stats", "true",
"stats.field", paras.toString());
// simple assert, mostly relying on comparison with single shard
FieldStatsInfo s = rsp.getFieldStatsInfo().get("k");
@@ -850,6 +857,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
}
}
}
+ handle.remove("stddev");
+ handle.remove("sumOfSquares");
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
" (adjust constant accordingly if intentional)",
5082, numTotalStatQueries);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/730d10f1/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
index 36c7a51..7cfb656 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
@@ -54,7 +54,6 @@ import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.commons.math3.util.Combinations;
import com.tdunning.math.stats.AVLTreeDigest;
-import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
import org.apache.solr.util.hll.HLL;
@@ -465,6 +464,47 @@ public class StatsComponentTest extends AbstractSolrTestCase {
}
+ // Check for overflow of sumOfSquares
+ public void testFieldStatisticsResultsDateFieldOverflow() throws Exception {
+ SolrCore core = h.getCore();
+
+ assertU(adoc("id", "1", "active_dt", "2015-12-14T09:00:00Z"));
+ assertU(commit());
+
+ Map<String, String> args = new HashMap<>();
+ args.put(CommonParams.Q, "*:*");
+ args.put(StatsParams.STATS, "true");
+ args.put(StatsParams.STATS_FIELD, "active_dt");
+ args.put("indent", "true");
+ SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+ assertQ("test date statistics values", req,
+ "//long[@name='count'][.='1']",
+ "//date[@name='min'][.='2015-12-14T09:00:00Z']",
+ "//date[@name='max'][.='2015-12-14T09:00:00Z']",
+ "//date[@name='sum'][.='2015-12-14T09:00:00Z']",
+ "//date[@name='mean'][.='2015-12-14T09:00:00Z']",
+ "//double[@name='sumOfSquares'][.='" + Double.toString(2102742446988960000000000.0)+"']"
+ );
+
+ assertU(adoc("id", "2", "active_dt", "2115-12-14T09:00:00Z"));
+ assertU(adoc("id", "3", "active_dt", "2215-12-14T09:00:00Z"));
+ assertU(commit());
+
+ assertQ("test date statistics values", req,
+ "//long[@name='count'][.='3']",
+ "//date[@name='min'][.='2015-12-14T09:00:00Z']",
+ "//date[@name='max'][.='2215-12-14T09:00:00Z']",
+ "//date[@name='sum'][.='2407-11-08T03:00:00Z']",
+ "//date[@name='mean'][.='2115-12-14T09:00:00Z']",
+ "//double[@name='sumOfSquares'][.='" + Double.toString(83555549895529430000000000.0)+"']",
+ // The following number matches the number returned by the current solr
+ // implementation of standard deviation. Should be 3155673600000.
+ // That number is not precise, and the implementation should be fixed.
+ "//double[@name='stddev'][.='" + Double.toString(3155673599999.999)+"']"
+ );
+ }
+
public void doTestFieldStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception {
assertU(adoc("id", "1", f, "-10"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/730d10f1/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
index ad006a4..812663e 100644
--- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
@@ -240,6 +240,13 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
public static int SKIP = 2;
public static int SKIPVAL = 4;
public static int UNORDERED = 8;
+
+ /**
+ * When this flag is set, Double values will be allowed a difference ratio of 1E-8
+ * between the non-distributed and the distributed returned values
+ */
+ public static int FUZZY = 16;
+ private static final double DOUBLE_RATIO_LIMIT = 1E-8;
protected int flags;
protected Map<String, Integer> handle = new HashMap<>();
@@ -876,6 +883,31 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
}
+ if ((flags & FUZZY) != 0) {
+ if ((a instanceof Double && b instanceof Double)) {
+ double aaa = ((Double) a).doubleValue();
+ double bbb = ((Double) b).doubleValue();
+ if (aaa == bbb || ((Double) a).isNaN() && ((Double) b).isNaN()) {
+ return null;
+ }
+ if ((aaa == 0.0) || (bbb == 0.0)) {
+ return ":" + a + "!=" + b;
+ }
+
+ double diff = Math.abs(aaa - bbb);
+ // When stats computations are done on multiple shards, there may
+ // be small differences in the results. Allow a small difference
+ // between the result of the computations.
+
+ double ratio = Math.max(Math.abs(diff / aaa), Math.abs(diff / bbb));
+ if (ratio > DOUBLE_RATIO_LIMIT) {
+ return ":" + a + "!=" + b;
+ } else {
+ return null;// close enough.
+ }
+ }
+ }
+
if (!(a.equals(b))) {
return ":" + a + "!=" + b;
}