You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/11/07 04:44:14 UTC

[incubator-datasketches-java] 01/01: Tweak EngagementTest to make it more correct and understandable.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch TweakEngagementTest
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 48d55ded3a1019f85f8ad512b2534e7302bd0a9c
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Nov 6 20:43:55 2019 -0800

    Tweak EngagementTest to make it more correct and understandable.
---
 .../tuple/aninteger/EngagementTest.java            | 34 ++++++++++++++--------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
index 6286896..cd91284 100644
--- a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
@@ -84,7 +84,7 @@ public class EngagementTest {
     CompactSketch<IntegerSummary> result = union.getResult();
     SketchIterator<IntegerSummary> itr = result.iterator();
 
-    int[] numDaysArr = new int[len + 1]; //zero index ignored
+    int[] numDaysArr = new int[len + 1]; //zero index is ignored
 
     while (itr.next()) {
       //For each unique visitor from the result sketch, get the # days visited
@@ -93,28 +93,38 @@ public class EngagementTest {
       numDaysArr[numDaysVisited]++; //values range from 1 to 30
     }
 
-    println("Engagement Histogram:");
+    println("\nEngagement Histogram:");
     println("Number of Unique Visitors by Number of Days Visited");
     printf("%12s%12s%12s%12s\n","Days Visited", "Estimate", "LB", "UB");
     int sumVisits = 0;
+    double theta = result.getTheta();
     for (int i = 0; i < numDaysArr.length; i++) {
       int visitorsAtDaysVisited = numDaysArr[i];
       if (visitorsAtDaysVisited == 0) { continue; }
-      int lbVisitorsAtDaysVisited = (int) round(result.getLowerBound(numStdDev, visitorsAtDaysVisited));
-      int ubVisitorsAtDaysVisited = (int) round(result.getUpperBound(numStdDev, visitorsAtDaysVisited));
       sumVisits += visitorsAtDaysVisited * i;
-      printf("%12d%12d%12d%12d\n",
-          i, visitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited);
+
+      double estVisitorsAtDaysVisited = visitorsAtDaysVisited / theta;
+      double lbVisitorsAtDaysVisited = result.getLowerBound(numStdDev, visitorsAtDaysVisited);
+      double ubVisitorsAtDaysVisited = result.getUpperBound(numStdDev, visitorsAtDaysVisited);
+
+      printf("%12d%12.0f%12.0f%12.0f\n",
+          i, estVisitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited);
     }
+
+    //The estimate and bounds of the total number of visitors comes directly from the sketch.
     double visitors = result.getEstimate();
     double lbVisitors = result.getLowerBound(numStdDev);
     double ubVisitors = result.getUpperBound(numStdDev);
-    int lbVisits = (int) round((sumVisits * lbVisitors)/visitors);
-    int ubVisits = (int) round((sumVisits * ubVisitors)/visitors);
     printf("\n%12s%12s%12s%12s\n","Totals", "Estimate", "LB", "UB");
-    printf("%12s%12d%12d%12d\n", "Visitors",
-        (int)round(visitors), (int)round(lbVisitors), (int)round(ubVisitors));
-    printf("%12s%12d%12d%12d\n", "Visits", sumVisits, lbVisits, ubVisits);
+    printf("%12s%12.0f%12.0f%12.0f\n", "Visitors", visitors, lbVisitors, ubVisitors);
+
+    //The total number of visits, however, is a scaled metric and takes advantage of the fact that
+    //the retained entries in the sketch is a uniform random sample of all unique visitors, and
+    //the the rest of the unique users will likely behave in the same way.
+    double estVisits = sumVisits / theta;
+    double lbVisits = (estVisits * lbVisitors) / visitors;
+    double ubVisits = (estVisits * ubVisitors) / visitors;
+    printf("%12s%12.0f%12.0f%12.0f\n\n", "Visits", estVisits, lbVisits, ubVisits);
   }
 
   /**
@@ -129,6 +139,6 @@ public class EngagementTest {
    * @param args arguments
    */
   private static void printf(String fmt, Object ... args) {
-    //System.out.printf(fmt, args); //Enable/Disable printing here
+    System.out.printf(fmt, args); //Enable/Disable printing here
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org