You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2021/12/16 15:05:31 UTC

[impala] 04/04: IMPALA-11005 (part 3): Repalce random number generator with mt19937_64

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1ed48a542beeadc544193fbd4e74c2d1ac68daf5
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Thu Nov 11 15:12:53 2021 -0800

    IMPALA-11005 (part 3): Repalce random number generator with mt19937_64
    
    Previous patch upgraded boost library. This patch changes 64-bit random
    number generator from ranlux64_3 to mt19937_64 since mt19937_64 has
    better performance according to boost benchmark at https://www.boost.org
    /doc/libs/1_74_0/doc/html/boost_random/performance.html.
    Also fixs an unit-test which is affected by the change of random number
    generator.
    
    Testing:
     - Passed exhaustive tests.
    
    Change-Id: Iade226fc17442f4d7b9b14e4a9e80a30a3856226
    Reviewed-on: http://gerrit.cloudera.org:8080/18022
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/aggregate-functions-ir.cc                           | 9 +++++----
 .../functional-query/queries/QueryTest/alloc-fail-init.test      | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index a658e39..fe5b324 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -23,7 +23,7 @@
 #include <utility>
 #include <cmath>
 
-#include <boost/random/ranlux.hpp>
+#include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_int.hpp>
 
 #include "codegen/impala-ir.h"
@@ -53,7 +53,7 @@
 #include "common/names.h"
 
 using boost::uniform_int;
-using boost::ranlux64_3;
+using boost::mt19937_64;
 using std::make_pair;
 using std::map;
 using std::min_element;
@@ -1277,8 +1277,9 @@ class ReservoirSampleState {
   int64_t source_size_;
 
   // Random number generator for generating 64-bit integers
-  // TODO: Replace with mt19937_64 when upgrading boost
-  ranlux64_3 rng_;
+  // Replace ranlux64_3 with mt19937_64 for better performance. See boost benchmark at
+  // https://www.boost.org/doc/libs/1_74_0/doc/html/boost_random/performance.html
+  mt19937_64 rng_;
 
   // True if the array of samples is in the same memory allocation as this object. If
   // false, this object is responsible for freeing the memory.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test
index c6d17b5..c6d77d6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test
@@ -13,7 +13,7 @@ FunctionContext::Allocate() failed to allocate 1 bytes.
 ---- QUERY
 select sample(timestamp_col) from functional.alltypes
 ---- CATCH
-FunctionContext::Allocate() failed to allocate 248 bytes.
+FunctionContext::Allocate() failed to allocate 2536 bytes.
 ====
 ---- QUERY
 select group_concat(string_col) from functional.alltypes
@@ -58,7 +58,7 @@ FunctionContextImpl::AllocateForResults() failed to allocate 120 bytes.
 ---- QUERY
 select appx_median(int_col) from functional.alltypes
 ---- CATCH
-FunctionContext::Allocate() failed to allocate 248 bytes.
+FunctionContext::Allocate() failed to allocate 2536 bytes.
 ====
 ---- QUERY
 select to_date(now())