You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2022/12/08 23:13:01 UTC

[pinot] 01/01: Fix the flaky test for StatisticalQueriesTest

This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch fixing-statistical-test
in repository https://gitbox.apache.org/repos/asf/pinot.git

commit a402db15d2c7c91ca2be7a44f0ce2c4bd2200ca2
Author: Seunghyun Lee <se...@startree.ai>
AuthorDate: Thu Dec 8 15:11:09 2022 -0800

    Fix the flaky test for StatisticalQueriesTest
    
    VarianceTuple did not correctly handle merging empty variance
    tuple. This PR fixes the issue.
---
 .../pinot/queries/StatisticalQueriesTest.java      | 43 ++++++++++++++++++++--
 .../segment/local/customobject/VarianceTuple.java  |  6 ++-
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/StatisticalQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/StatisticalQueriesTest.java
index d61dcaa18f..5eaff1d923 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/StatisticalQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/StatisticalQueriesTest.java
@@ -37,6 +37,7 @@ import org.apache.pinot.core.operator.blocks.results.AggregationResultsBlock;
 import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock;
 import org.apache.pinot.core.operator.query.AggregationOperator;
 import org.apache.pinot.core.operator.query.GroupByOperator;
+import org.apache.pinot.core.query.aggregation.function.VarianceAggregationFunction;
 import org.apache.pinot.core.query.aggregation.groupby.AggregationGroupByResult;
 import org.apache.pinot.segment.local.customobject.CovarianceTuple;
 import org.apache.pinot.segment.local.customobject.VarianceTuple;
@@ -430,6 +431,41 @@ public class StatisticalQueriesTest extends BaseQueriesTest {
     checkGroupByResultsForCovariance(brokerResponse, _expectedFinalResultVer2);
   }
 
+  @Test
+  public void testVarianceTuple() {
+    // Start from empty variance and try to merge non-empty variance
+    VarianceTuple varianceTuple = new VarianceTuple(0, 0.0, 0.0);
+    Variance variance = new Variance(false);
+    assertEquals(computeVariancePop(varianceTuple), variance.getResult());
+    assertEquals(variance.getResult(), Double.NaN);
+
+    VarianceTuple firstValueVarianceTuple = new VarianceTuple(1, 1.0, 0.0);
+    varianceTuple.apply(firstValueVarianceTuple);
+    variance.increment(1.0);
+    assertTrue(Precision.equalsWithRelativeTolerance(computeVariancePop(varianceTuple), variance.getResult(),
+        RELATIVE_EPSILON));
+
+    VarianceTuple secondValueVarianceTuple = new VarianceTuple(1, 3.0, 0.0);
+    varianceTuple.apply(secondValueVarianceTuple);
+    variance.increment(3.0);
+    assertTrue(Precision.equalsWithRelativeTolerance(computeVariancePop(varianceTuple), variance.getResult(),
+        RELATIVE_EPSILON));
+
+
+    // For this time, start from non-empty variance and try to merge empty variance
+    varianceTuple = new VarianceTuple(0, 0.0, 0.0);
+    varianceTuple.apply(new VarianceTuple(1, 1.0, 0.0));
+    varianceTuple.apply(new VarianceTuple(1, 2.0, 0.0));
+    variance = new Variance(false);
+    variance.increment(1.0);
+    variance.increment(2.0);
+
+    varianceTuple.apply(new VarianceTuple(0, 0.0, 0.0));
+    assertTrue(Precision.equalsWithRelativeTolerance(computeVariancePop(varianceTuple), variance.getResult(),
+        RELATIVE_EPSILON));
+  }
+
+
   @Test
   public void testVarianceAggregationOnly() {
     // Compute the expected values
@@ -506,9 +542,6 @@ public class StatisticalQueriesTest extends BaseQueriesTest {
     assertTrue(
         Precision.equalsWithRelativeTolerance((double) results[7], expectedVariances[7].getResult(), RELATIVE_EPSILON));
 
-    VarianceTuple test = ((VarianceTuple) aggregationResult.get(0));
-    test.apply((new VarianceTuple(0, 0, 0.0d)));
-    System.out.println(test.getM2());
     // Validate the response for a query with a filter
     query = "SELECT VAR_POP(intColumnX) from testTable" + getFilter();
     brokerResponse = getBrokerResponse(query);
@@ -735,6 +768,10 @@ public class StatisticalQueriesTest extends BaseQueriesTest {
     }
   }
 
+  private double computeVariancePop(VarianceTuple varianceTuple) {
+    return varianceTuple.getM2() / varianceTuple.getCount();
+  }
+
   @AfterClass
   public void tearDown()
       throws IOException {
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/VarianceTuple.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/VarianceTuple.java
index 09594364b4..d1159002a1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/VarianceTuple.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/VarianceTuple.java
@@ -36,7 +36,8 @@ public class VarianceTuple implements Comparable<VarianceTuple> {
     if (count == 0) {
       return;
     }
-    double delta = (sum / count) - (_sum / _count);
+    double currAvg = (_count == 0) ? 0 : _sum / _count;
+    double delta = (sum / count) - currAvg;
     _m2 += m2 + delta * delta * count * _count / (count + _count);
     _count += count;
     _sum += sum;
@@ -46,7 +47,8 @@ public class VarianceTuple implements Comparable<VarianceTuple> {
     if (varianceTuple._count == 0) {
       return;
     }
-    double delta = (varianceTuple._sum / varianceTuple._count) - (_sum / _count);
+    double currAvg = (_count == 0) ? 0 : _sum / _count;
+    double delta = (varianceTuple._sum / varianceTuple._count) - currAvg;
     _m2 += varianceTuple._m2 + delta * delta * varianceTuple._count * _count / (varianceTuple._count + _count);
     _count += varianceTuple._count;
     _sum += varianceTuple._sum;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org