You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2021/02/18 00:31:16 UTC

[madlib] 01/02: sample: Use bernoulli_distribution from boost::, not stl:: or boost::TR1

This is an automated email from the ASF dual-hosted git repository.

domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit bd54e013f8c2cba95bcfc5cc05fb73cdbfa91cbe
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Fri Jan 22 12:28:34 2021 -0800

    sample: Use bernoulli_distribution from boost::, not stl:: or boost::TR1
    
    The TR1 sub namespace no longer exists in recent versions of boost.
    Previously, this was fixed by using the C++11 STL version if it's
    available, otherwise falling back on TR1.  But that didn't work
    due to a slight incompatibility between the C++11 and the boost
    version.  The boost version still exists, just not under TR1--and
    we already use it elsewhere in the codebase, so this should be
    the cleanest fix.
    
    Also fix memory-corruption crash in std::domain_error(), by copying formatted
      string to memory allocated safely via postgres with palloc()
    
    Also improve some non-critical C++ issues:
    
    - Convert ptr_fun() to function() to get rid of C++11 deprecation warning
    
    - src/dbal/Reference_proto.hpp:  Fix an error in the template definition
      of class Ref.  The default value for IsMutable should be the value true
      or false, not the type bool.  The only reason this wasn't generating
      compiler errors is that IsMutable happens to be passed explicitly in
      every place in the code where the template is instantiated--so the
      default was ignored.
    
    - Hide global boost symbols from external libraries
      We were already doing this for all STL symbols--for the same reasons,
      doing it for boost as well should make MADlib more robust when
      integrating with 3rd party libraries.
---
 src/dbal/BoostIntegration/MathToolkit_impl.hpp     | 26 ++++++++++++++--------
 src/dbal/Reference_proto.hpp                       |  2 +-
 src/library.ver                                    |  2 +-
 src/modules/lda/lda.cpp                            |  6 ++---
 src/modules/recursive_partitioning/DT_impl.hpp     |  2 +-
 .../recursive_partitioning/feature_encoding.cpp    |  2 +-
 src/modules/sample/WeightedSample_impl.hpp         | 14 ++----------
 src/modules/stats/wilcoxon_signed_rank_test.cpp    | 10 ++++++---
 .../NativeRandomNumberGenerator_proto.hpp          |  3 +--
 9 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/src/dbal/BoostIntegration/MathToolkit_impl.hpp b/src/dbal/BoostIntegration/MathToolkit_impl.hpp
index a83b421..2b93c8c 100644
--- a/src/dbal/BoostIntegration/MathToolkit_impl.hpp
+++ b/src/dbal/BoostIntegration/MathToolkit_impl.hpp
@@ -49,22 +49,30 @@ user_domain_error(const char*, const char* inMessage, const T& inVal) {
     if (std::isnan(inVal))
         return std::numeric_limits<double>::quiet_NaN();
 
-    // The following line is taken from
-    // http://www.boost.org/doc/libs/1_49_0/libs/math/doc/sf_and_dist/html/math_toolkit/policy/pol_tutorial/user_def_err_pol.html
+#if _GLIBCXX_USE_CXX11_ABI
+    int prec = std::numeric_limits<T>::max_digits10;
+#else
+    // For older C++ standards, max_digits10 was not available so we have to convert manually
+    //  http://www.boost.org/doc/libs/1_49_0/libs/math/doc/sf_and_dist/html/math_toolkit/policy/pol_tutorial/user_def_err_pol.html
     int prec = 2 + (std::numeric_limits<T>::digits * 30103UL) / 100000UL;
+#endif // _GLIBCXX_USE_CXX11_ABI
 
-    std::string msg = (boost::format(inMessage)
-            % boost::io::group(std::setprecision(prec), inVal)
-        ).str();
+    throw std::domain_error(inMessage);
 
+     std:: string *msg = new std::string(
+        (boost::format(inMessage)
+            % boost::io::group(std::setprecision(prec), inVal)
+        ).str()
+     );
+ 
     // Some Boost error messages contain a space before the punctuation mark,
     // which we will remove.
-    std::string::iterator lastChar = msg.end() - 1;
-    std::string::iterator secondLastChar = msg.end() - 2;
+    std::string::iterator lastChar = msg->end() - 1;
+    std::string::iterator secondLastChar = msg->end() - 2;
     if (std::ispunct(*lastChar) && std::isspace(*secondLastChar))
-        msg.erase(secondLastChar, lastChar);
+        msg->erase(secondLastChar, lastChar);
 
-    throw std::domain_error(msg);
+    throw std::domain_error(*msg);
 }
 
 } // namespace policies
diff --git a/src/dbal/Reference_proto.hpp b/src/dbal/Reference_proto.hpp
index be3ffaa..550b7db 100644
--- a/src/dbal/Reference_proto.hpp
+++ b/src/dbal/Reference_proto.hpp
@@ -18,7 +18,7 @@ namespace dbal {
  * @tparam IsMutable Whether the target value is mutable. Note that \c IsMutable
  *     overrides any const-qualifier that \c T may contain.
  */
-template <typename T, bool IsMutable = boost::is_const<T>::value_type>
+template <typename T, bool IsMutable = boost::is_const<T>::value>
 class Ref {
 public:
     typedef const T val_type;
diff --git a/src/library.ver b/src/library.ver
index 81021d1..680928d 100644
--- a/src/library.ver
+++ b/src/library.ver
@@ -36,6 +36,6 @@ VERS_1.0 {
 #	hiding those in namespace std.
 #
             std::*;
-
+            boost::*;
         };
 };
diff --git a/src/modules/lda/lda.cpp b/src/modules/lda/lda.cpp
index 6d61b43..2cd7c45 100644
--- a/src/modules/lda/lda.cpp
+++ b/src/modules/lda/lda.cpp
@@ -210,9 +210,9 @@ AnyType lda_gibbs_sample::run(AnyType & args)
     if (!args.getUserFuncContext()) {
         ArrayHandle<int64_t> model64 = args[3].getAs<ArrayHandle<int64_t> >();
         if (model64.size() != model64_size) {
-            std::stringstream ss;
-            ss << "invalid dimension: model64.size() = " << model64.size();
-            throw std::invalid_argument(ss.str());
+            std::stringstream err_msg;
+            err_msg << "invalid dimension: model64.size() = " << model64.size();
+            throw std::invalid_argument(err_msg.str());
         }
         if (__min(model64) < 0) {
             throw std::invalid_argument("invalid topic counts in model");
diff --git a/src/modules/recursive_partitioning/DT_impl.hpp b/src/modules/recursive_partitioning/DT_impl.hpp
index 8f36173..e86d476 100644
--- a/src/modules/recursive_partitioning/DT_impl.hpp
+++ b/src/modules/recursive_partitioning/DT_impl.hpp
@@ -385,7 +385,7 @@ DecisionTree<Container>::impurity(const ColumnVector &stats) const {
         if (impurity_type == GINI){
             return 1 - proportions.cwiseProduct(proportions).sum();
         } else if (impurity_type == ENTROPY){
-            return proportions.unaryExpr(std::ptr_fun(computeEntropy)).sum();
+            return proportions.unaryExpr(std::function<double(const double&)>(computeEntropy)).sum();
         } else if (impurity_type == MISCLASS){
             return 1 - proportions.maxCoeff();
         } else
diff --git a/src/modules/recursive_partitioning/feature_encoding.cpp b/src/modules/recursive_partitioning/feature_encoding.cpp
index a5f131b..7d0083a 100644
--- a/src/modules/recursive_partitioning/feature_encoding.cpp
+++ b/src/modules/recursive_partitioning/feature_encoding.cpp
@@ -164,7 +164,7 @@ dst_compute_entropy_final::run(AnyType &args){
     ColumnVector probs = state.cast<double>() / sum_of_dep_counts;
     // usage of unaryExpr with functor:
     // http://eigen.tuxfamily.org/dox/classEigen_1_1MatrixBase.html#a23fc4bf97168dee2516f85edcfd4cfe7
-    return -(probs.unaryExpr(std::ptr_fun(p_log2_p))).sum();
+    return -(probs.unaryExpr(std::function<double(const double&)>(p_log2_p))).sum();
 }
 // ------------------------------------------------------------
 
diff --git a/src/modules/sample/WeightedSample_impl.hpp b/src/modules/sample/WeightedSample_impl.hpp
index 7137b5b..0f6ae87 100644
--- a/src/modules/sample/WeightedSample_impl.hpp
+++ b/src/modules/sample/WeightedSample_impl.hpp
@@ -7,17 +7,7 @@
 #ifndef MADLIB_MODULES_SAMPLE_WEIGHTED_SAMPLE_IMPL_HPP
 #define MADLIB_MODULES_SAMPLE_WEIGHTED_SAMPLE_IMPL_HPP
 
-#if _GLIBCXX_USE_CXX11_ABI
-#include <random>
-#else
-#include <boost/tr1/random.hpp>
-
-// Import TR1 names (currently used from boost). This can go away once we make
-// the switch to C++11.
-namespace std {
-    using tr1::bernoulli_distribution;
-}
-#endif // _GNUCXX_USE_CXX11_ABI
+#include <boost/random/bernoulli_distribution.hpp>
 
 namespace madlib {
 
@@ -113,7 +103,7 @@ WeightedSampleAccumulator<Container, T>::operator<<(
     // weight
     if (weight > 0.) {
         weight_sum += weight;
-        std::bernoulli_distribution success(weight / weight_sum);
+        boost::bernoulli_distribution<double> success(weight / weight_sum);
         // Note that a NativeRandomNumberGenerator object is stateless, so it
         // is not a problem to instantiate an object for each RN generation...
         NativeRandomNumberGenerator generator;
diff --git a/src/modules/stats/wilcoxon_signed_rank_test.cpp b/src/modules/stats/wilcoxon_signed_rank_test.cpp
index 40f7fa0..4fcd6b5 100644
--- a/src/modules/stats/wilcoxon_signed_rank_test.cpp
+++ b/src/modules/stats/wilcoxon_signed_rank_test.cpp
@@ -68,9 +68,13 @@ wsr_test_transition::run(AnyType &args) {
         ? args[2].getAs<double>()
         : -1;
 
-    if (!std::isfinite(precision))
-        throw std::invalid_argument((boost::format(
-            "Precision must be finite, but got %1%.") % precision).str());
+
+    if (!std::isfinite(precision)) {
+        std::stringstream ss;
+        ss << "Precision must be finite, but got " << precision;
+        std::string *msg = new std::string(ss.str());
+        throw std::invalid_argument( *msg );
+    }
     else if (precision < 0)
         precision = value * std::numeric_limits<double>::epsilon();
 
diff --git a/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp b/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp
index f7f5555..52f8e73 100644
--- a/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp
+++ b/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp
@@ -22,12 +22,11 @@ namespace postgres {
  */
 class NativeRandomNumberGenerator {
 public:
+    typedef double result_type;
 
 #if _GLIBCXX_USE_CXX11_ABI
-    typedef long long result_type;
 #define CONST_EXPR constexpr
 #else
-    typedef double result_type;
 #define CONST_EXPR
 #endif