You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by ch...@apache.org on 2022/08/10 15:52:24 UTC

[datasketches-cpp] branch engagement-test updated: Removed comments and added basic docstrings

This is an automated email from the ASF dual-hosted git repository.

charlie pushed a commit to branch engagement-test
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/engagement-test by this push:
     new 9b55b1a  Removed comments and added basic docstrings
9b55b1a is described below

commit 9b55b1a2eb80616c8121ceb0df4cbbb0affc8e1d
Author: cdickens <di...@gmail.com>
AuthorDate: Wed Aug 10 16:52:05 2022 +0100

    Removed comments and added basic docstrings
---
 tuple/test/engagement_test.cpp | 54 ++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/tuple/test/engagement_test.cpp b/tuple/test/engagement_test.cpp
index f837c4d..3fab6c7 100644
--- a/tuple/test/engagement_test.cpp
+++ b/tuple/test/engagement_test.cpp
@@ -80,9 +80,10 @@ class EngagementTest{
 public:
     int num_std_dev = 2 ;
     void test_always_one_update(){
-        std::cout << "########## Testing ALWAYS ONE policy ##########" << std::endl ;
+        /*
+         * Tests that updates into an update_tuple_sketch sketch only keeps a 1 in the column for stored values.
+         */
         int lgK = 8 ;
-        // Here is where the IntegerSketch should go
         std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array ;
 
         auto always_one_sketch = always_one_tuple_sketch::builder(always_one_policy<int>()).set_lg_k(lgK).build() ;
@@ -104,7 +105,9 @@ public:
     }
 
     void test_sum_update_policy(){
-        std::cout << "########## Testing SUM policy on UPDATE SKETCH ##########" << std::endl ;
+        /*
+        * Tests that updates into an sum_update_tuple_sketch sum the stored values on updates.
+        */
         int lgK = 8 ;
         auto sum_sketch = sum_update_tuple_sketch::builder().set_lg_k(lgK).build() ;
 
@@ -124,21 +127,20 @@ public:
     }
 
     void test_sum_union_policy(){
-        // Union two update sketches using the sum policy
+        /*
+         * Tests that updates into two sketches of sum_update_tuple_sketch flavour, which have been unioned,
+         * cause the stored values of two of the same keys to be summed.
+         */
         auto sketch1 = sum_update_tuple_sketch::builder().build() ;
         auto sketch2 = sum_update_tuple_sketch::builder().build() ;
 
         sketch1.update(1, 1);
         sketch1.update(2, 1);
         sketch1.update(3, 3);
-        // std::cout << "********** sketch 1 UPDATE SUMMARY **********" << std::endl ;
-        // std::cout << sketch1.to_string(true) << std::endl ;
 
         sketch2.update(1, 2);
         sketch2.update(2, 1);
         sketch2.update(3, 7);
-        // std::cout << "********** sketch 2 UPDATE SUMMARY **********" << std::endl ;
-        // std::cout << sketch2.to_string(true) << std::endl ;
 
         auto union_sketch = sum_union_tuple_sketch::builder().build() ;
         union_sketch.update(sketch1) ;
@@ -153,16 +155,15 @@ public:
         }
         REQUIRE(num_retained == 3);
         REQUIRE(sum == 15); // 1:(1+2) + 2:(1+1) + 3:(3+7) = 15
-
-        // std::cout << "********** UNION SUMMARY **********" << std::endl ;
-        // std::cout << union_result.to_string(true) << std::endl ;
-
     }
 
     void compute_engagement_histogram(){
-        std::cout << "########## Testing ENGAGEMENT ##########" << std::endl ;
+        /*
+         * Returns the estimated histogram from the synthetic data.
+         * On inspection one can verify this agrees with the
+         * https://github.com/apache/datasketches-java/blob/master/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
+         */
         int lgK = 8 ;
-        //int K = 1 << lgK ;
         const int days = 30 ;
         int v = 0 ;
         std::set<int> set_array[days];
@@ -170,30 +171,22 @@ public:
 
 
         for(int i=0; i<days ; i++){
-
             auto builder = always_one_tuple_sketch::builder(always_one_policy<int>()) ;
             builder.set_lg_k(lgK) ;
             auto sketch = builder.build() ;
             sketch_array.push_back(sketch);
         }
         REQUIRE(sketch_array.size() == days) ;
-        std::cout << "Size of vector: " << sketch_array.size() << std::endl ;
-
 
         for(int i=0; i<=days; i++){
             int32_t num_ids = get_num_ids(days, i) ;
             int32_t num_days = get_num_days(days, i) ;
 
-            //std::cout << i << "\t" << num_ids << "\t" << num_days << std::endl ;
-            // TO DO: Continue from here and figure out what to do with the tuple sketches.
             int my_v = v++ ;
             for(int d=0 ; d<num_days; d++){
                 for(int id = 0; id < num_ids; id++){
-                    //std::cout << id << " " << my_v << " " << my_v + id << " " << num_ids << std::endl ;
                     set_array[d].insert(my_v + id) ;
                     sketch_array[d].update(my_v + id, 1) ;
-                    //std::cout << "d: " << d << " id: " << id << " my_v + id: " << my_v + id << std::endl ;
-                    // sk_arr[d].update(my_v + id, 1) ; // update the day d sketch with the key my_v + id
                 }
             }
             v += num_ids ;
@@ -216,45 +209,38 @@ private:
         return int(round(exp( (d-i) * log(d) / d ))) ;
     }
 
-    int8_t round_double_to_int(double x){
+    int32_t round_double_to_int(double x){
         return int(std::round(x)) ;
     }
 
-
-    //void union_ops(int lgk, std::vector<datasketches::update_tuple_sketch<int>> sketches){
     void union_ops(int lgk, std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketches){
         int num_sketches = sketches.size() ;
-        //auto u =  datasketches::tuple_union<int>::builder().set_lg_k(lgk).build() ;
         auto u = sum_union_tuple_sketch::builder().set_lg_k(lgk).build() ;
 
-
         for(auto sk:sketches){
             u.update(sk) ;
         }
         auto union_result = u.get_result() ;
-        std::cout << "Union type: " << typeid(union_result).name() << std::endl ;
-        // std::cout << "The estimate is: " << res.get_estimate() << std::endl ; // agrees with 271.9156532434 from java.
+        // res.get_estimate() should be 271.9156532434.
         std::vector<uint64_t> num_days_arr(num_sketches+1) ;
 
         int num_retained = 0 ;
         int total_sum = 0 ;
 
         for (const auto& entry: union_result) {
-            std::cout << "First: " << entry.first << "\tSecond: " << entry.second << std::endl ;
+//            std::cout << "First: " << entry.first << "\tSecond: " << entry.second << std::endl ;
             int num_days_visited = entry.second ;
             num_retained++ ;
             total_sum += entry.second ;
             num_days_arr[num_days_visited]++;
         }
-        std::cout << "Num retained items: " << num_retained << std::endl ; // This agrees with Java.
-        std::cout << "Sum(retained items): " << total_sum << std::endl ; // This agrees with Java.
+        std::cout << "Num retained items: " << num_retained << std::endl ;
+        std::cout << "Sum(retained items): " << total_sum << std::endl ;
 
         for(int i = 1; i<num_sketches+1; i++){
             std::cout<< "i = " << i << "\tnum_days_arr[i] = " << num_days_arr[i] << std::endl ;
         }
 
-        // *********************** WE AGREE UP TO HERE ***********************
-        // For pretty printing
         int sum_visits = 0;
         double theta = union_result.get_theta();
         std::cout <<"\t\tEngagement Histogram.\t\t\t\n" ;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org