You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/11/07 04:44:13 UTC

[incubator-datasketches-java] branch TweakEngagementTest created (now 48d55de)

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a change to branch TweakEngagementTest
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git.


      at 48d55de  Tweak EngagementTest to make it more correct and understandable.

This branch includes the following new commits:

     new 48d55de  Tweak EngagementTest to make it more correct and understandable.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-java] 01/01: Tweak EngagementTest to make it more correct and understandable.

Posted by le...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch TweakEngagementTest
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 48d55ded3a1019f85f8ad512b2534e7302bd0a9c
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Nov 6 20:43:55 2019 -0800

    Tweak EngagementTest to make it more correct and understandable.
---
 .../tuple/aninteger/EngagementTest.java            | 34 ++++++++++++++--------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
index 6286896..cd91284 100644
--- a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
@@ -84,7 +84,7 @@ public class EngagementTest {
     CompactSketch<IntegerSummary> result = union.getResult();
     SketchIterator<IntegerSummary> itr = result.iterator();
 
-    int[] numDaysArr = new int[len + 1]; //zero index ignored
+    int[] numDaysArr = new int[len + 1]; //zero index is ignored
 
     while (itr.next()) {
       //For each unique visitor from the result sketch, get the # days visited
@@ -93,28 +93,38 @@ public class EngagementTest {
       numDaysArr[numDaysVisited]++; //values range from 1 to 30
     }
 
-    println("Engagement Histogram:");
+    println("\nEngagement Histogram:");
     println("Number of Unique Visitors by Number of Days Visited");
     printf("%12s%12s%12s%12s\n","Days Visited", "Estimate", "LB", "UB");
     int sumVisits = 0;
+    double theta = result.getTheta();
     for (int i = 0; i < numDaysArr.length; i++) {
       int visitorsAtDaysVisited = numDaysArr[i];
       if (visitorsAtDaysVisited == 0) { continue; }
-      int lbVisitorsAtDaysVisited = (int) round(result.getLowerBound(numStdDev, visitorsAtDaysVisited));
-      int ubVisitorsAtDaysVisited = (int) round(result.getUpperBound(numStdDev, visitorsAtDaysVisited));
       sumVisits += visitorsAtDaysVisited * i;
-      printf("%12d%12d%12d%12d\n",
-          i, visitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited);
+
+      double estVisitorsAtDaysVisited = visitorsAtDaysVisited / theta;
+      double lbVisitorsAtDaysVisited = result.getLowerBound(numStdDev, visitorsAtDaysVisited);
+      double ubVisitorsAtDaysVisited = result.getUpperBound(numStdDev, visitorsAtDaysVisited);
+
+      printf("%12d%12.0f%12.0f%12.0f\n",
+          i, estVisitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited);
     }
+
+    //The estimate and bounds of the total number of visitors comes directly from the sketch.
     double visitors = result.getEstimate();
     double lbVisitors = result.getLowerBound(numStdDev);
     double ubVisitors = result.getUpperBound(numStdDev);
-    int lbVisits = (int) round((sumVisits * lbVisitors)/visitors);
-    int ubVisits = (int) round((sumVisits * ubVisitors)/visitors);
     printf("\n%12s%12s%12s%12s\n","Totals", "Estimate", "LB", "UB");
-    printf("%12s%12d%12d%12d\n", "Visitors",
-        (int)round(visitors), (int)round(lbVisitors), (int)round(ubVisitors));
-    printf("%12s%12d%12d%12d\n", "Visits", sumVisits, lbVisits, ubVisits);
+    printf("%12s%12.0f%12.0f%12.0f\n", "Visitors", visitors, lbVisitors, ubVisitors);
+
+    //The total number of visits, however, is a scaled metric and takes advantage of the fact that
+    //the retained entries in the sketch is a uniform random sample of all unique visitors, and
+    //the the rest of the unique users will likely behave in the same way.
+    double estVisits = sumVisits / theta;
+    double lbVisits = (estVisits * lbVisitors) / visitors;
+    double ubVisits = (estVisits * ubVisitors) / visitors;
+    printf("%12s%12.0f%12.0f%12.0f\n\n", "Visits", estVisits, lbVisits, ubVisits);
   }
 
   /**
@@ -129,6 +139,6 @@ public class EngagementTest {
    * @param args arguments
    */
   private static void printf(String fmt, Object ... args) {
-    //System.out.printf(fmt, args); //Enable/Disable printing here
+    System.out.printf(fmt, args); //Enable/Disable printing here
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org