You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/11/06 17:55:55 UTC
[arrow] branch master updated: ARROW-1750: [C++] Remove the need
for arrow/util/random.h
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0106f53 ARROW-1750: [C++] Remove the need for arrow/util/random.h
0106f53 is described below
commit 0106f531c04477b1c8bd088d097624ff43b44658
Author: Phillip Cloud <cp...@gmail.com>
AuthorDate: Mon Nov 6 12:55:49 2017 -0500
ARROW-1750: [C++] Remove the need for arrow/util/random.h
Author: Phillip Cloud <cp...@gmail.com>
Closes #1283 from cpcloud/ARROW-1750 and squashes the following commits:
3f6af737 [Phillip Cloud] ARROW-1750: [C++] Remove the need for arrow/util/random.h
---
cpp/src/arrow/array-test.cc | 93 ++++++++++------
cpp/src/arrow/ipc/ipc-json-test.cc | 4 +-
cpp/src/arrow/test-util.h | 214 ++++++++++++++++++++++++++++---------
cpp/src/arrow/util/CMakeLists.txt | 1 -
cpp/src/arrow/util/decimal.cc | 11 +-
cpp/src/arrow/util/decimal.h | 5 +-
cpp/src/arrow/util/random.h | 126 ----------------------
7 files changed, 236 insertions(+), 218 deletions(-)
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 168ef10..9f248cd 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -263,6 +263,8 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_TRUE(result->Equals(*expected));
}
+ int64_t FlipValue(int64_t value) const { return ~value; }
+
protected:
std::shared_ptr<DataType> type_;
std::unique_ptr<BuilderType> builder_;
@@ -272,44 +274,64 @@ class TestPrimitiveBuilder : public TestBuilder {
vector<uint8_t> valid_bytes_;
};
-#define PTYPE_DECL(CapType, c_type) \
- typedef CapType##Array ArrayType; \
- typedef CapType##Builder BuilderType; \
- typedef CapType##Type Type; \
- typedef c_type T; \
- \
- static std::shared_ptr<DataType> type() { \
- return std::shared_ptr<DataType>(new Type()); \
- }
+/// \brief uint8_t isn't a valid template parameter to uniform_int_distribution, so
+/// we use SampleType to determine which kind of integer to use to sample.
+template <typename T,
+ typename = typename std::enable_if<std::is_integral<T>::value, T>::type>
+struct UniformIntSampleType {
+ using type = T;
+};
+
+template <>
+struct UniformIntSampleType<uint8_t> {
+ using type = uint16_t;
+};
+
+template <>
+struct UniformIntSampleType<int8_t> {
+ using type = int16_t;
+};
-#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
+#define PTYPE_DECL(CapType, c_type) \
+ typedef CapType##Array ArrayType; \
+ typedef CapType##Builder BuilderType; \
+ typedef CapType##Type Type; \
+ typedef c_type T; \
+ \
+ static std::shared_ptr<DataType> type() { return std::make_shared<Type>(); }
+
+#define PINT_DECL(CapType, c_type) \
+ struct P##CapType { \
+ PTYPE_DECL(CapType, c_type) \
+ static void draw(int64_t N, vector<T>* draws) { \
+ using sample_type = typename UniformIntSampleType<c_type>::type; \
+ const T lower = std::numeric_limits<T>::min(); \
+ const T upper = std::numeric_limits<T>::max(); \
+ test::randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
+ draws); \
+ } \
+ }
+
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type) \
static void draw(int64_t N, vector<T>* draws) { \
- test::randint<T>(N, LOWER, UPPER, draws); \
+ test::random_real(N, 0, LOWER, UPPER, draws); \
} \
}
-#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
- struct P##CapType { \
- PTYPE_DECL(CapType, c_type) \
- static void draw(int64_t N, vector<T>* draws) { \
- test::random_real<T>(N, 0, LOWER, UPPER, draws); \
- } \
- }
-
-PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
-PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
-PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
-PINT_DECL(UInt64, uint64_t, 0, UINT64_MAX);
+PINT_DECL(UInt8, uint8_t);
+PINT_DECL(UInt16, uint16_t);
+PINT_DECL(UInt32, uint32_t);
+PINT_DECL(UInt64, uint64_t);
-PINT_DECL(Int8, int8_t, INT8_MIN, INT8_MAX);
-PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
-PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
-PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
+PINT_DECL(Int8, int8_t);
+PINT_DECL(Int16, int16_t);
+PINT_DECL(Int32, int32_t);
+PINT_DECL(Int64, int64_t);
-PFLOAT_DECL(Float, float, -1000, 1000);
-PFLOAT_DECL(Double, double, -1000, 1000);
+PFLOAT_DECL(Float, float, -1000.0f, 1000.0f);
+PFLOAT_DECL(Double, double, -1000.0, 1000.0);
struct PBoolean {
PTYPE_DECL(Boolean, uint8_t)
@@ -325,6 +347,11 @@ void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) {
}
template <>
+int64_t TestPrimitiveBuilder<PBoolean>::FlipValue(int64_t value) const {
+ return !value;
+}
+
+template <>
void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>& builder,
bool nullable) {
int64_t size = builder->length();
@@ -454,8 +481,8 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
// This should be true with a very high probability, but might introduce flakiness
ASSERT_LT(first_valid_idx, size - 1);
- draws[first_valid_idx] =
- static_cast<T>(~*reinterpret_cast<int64_t*>(&draws[first_valid_idx]));
+ draws[first_valid_idx] = static_cast<T>(
+ this->FlipValue(*reinterpret_cast<int64_t*>(&draws[first_valid_idx])));
ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
// test normal equality
@@ -724,8 +751,8 @@ void CheckSliceApproxEquals() {
vector<T> draws2;
const uint32_t kSeed = 0;
- test::random_real<T>(kSize, kSeed, 0, 100, &draws1);
- test::random_real<T>(kSize, kSeed + 1, 0, 100, &draws2);
+ test::random_real(kSize, kSeed, 0.0, 100.0, &draws1);
+ test::random_real(kSize, kSeed + 1, 0.0, 100.0, &draws2);
// Make the draws equal in the sliced segment, but unequal elsewhere (to
// catch not using the slice offset)
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc
index f2dd9e7..a560f09 100644
--- a/cpp/src/arrow/ipc/ipc-json-test.cc
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -222,8 +222,8 @@ void MakeBatchArrays(const std::shared_ptr<Schema>& schema, const int num_rows,
std::vector<int8_t> v1_values;
std::vector<int32_t> v2_values;
- test::randint<int8_t>(num_rows, 0, 100, &v1_values);
- test::randint<int32_t>(num_rows, 0, 100, &v2_values);
+ test::randint(num_rows, 0, 100, &v1_values);
+ test::randint(num_rows, 0, 100, &v2_values);
std::shared_ptr<Array> v1;
ArrayFromVector<Int8Type, int8_t>(is_valid, v1_values, &v1);
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 044fb94..7306f57 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,6 +18,7 @@
#ifndef ARROW_TEST_UTIL_H_
#define ARROW_TEST_UTIL_H_
+#include <algorithm>
#include <cstdint>
#include <limits>
#include <memory>
@@ -38,8 +39,8 @@
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
-#include "arrow/util/random.h"
#define ASSERT_RAISES(ENUM, expr) \
do { \
@@ -47,7 +48,7 @@
if (!s.Is##ENUM()) { \
FAIL() << s.ToString(); \
} \
- } while (0)
+ } while (false)
#define ASSERT_OK(expr) \
do { \
@@ -55,7 +56,7 @@
if (!s.ok()) { \
FAIL() << s.ToString(); \
} \
- } while (0)
+ } while (false)
#define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
@@ -63,15 +64,15 @@
do { \
::arrow::Status s = (expr); \
EXPECT_TRUE(s.ok()); \
- } while (0)
+ } while (false)
#define ABORT_NOT_OK(s) \
do { \
::arrow::Status _s = (s); \
if (ARROW_PREDICT_FALSE(!_s.ok())) { \
- exit(-1); \
+ exit(EXIT_FAILURE); \
} \
- } while (0);
+ } while (false);
namespace arrow {
@@ -79,27 +80,22 @@ using ArrayVector = std::vector<std::shared_ptr<Array>>;
namespace test {
-template <typename T>
-void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
- Random rng(random_seed());
- uint64_t draw;
- uint64_t span = upper - lower;
- T val;
- for (int64_t i = 0; i < N; ++i) {
- draw = rng.Uniform64(span);
- val = static_cast<T>(draw + lower);
- out->push_back(val);
- }
+template <typename T, typename U>
+void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
+ const int random_seed = 0;
+ std::mt19937 gen(random_seed);
+ std::uniform_int_distribution<T> d(lower, upper);
+ out->resize(N, static_cast<T>(0));
+ std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}
-template <typename T>
+template <typename T, typename U>
void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
- std::vector<T>* out) {
+ std::vector<U>* out) {
std::mt19937 gen(seed);
std::uniform_real_distribution<T> d(min_value, max_value);
- for (int64_t i = 0; i < n; ++i) {
- out->push_back(d(gen));
- }
+ out->resize(n, static_cast<T>(0));
+ std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}
template <typename T>
@@ -115,7 +111,8 @@ inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* poo
auto buffer = std::make_shared<PoolBuffer>(pool);
RETURN_NOT_OK(buffer->Resize(nbytes));
- memcpy(buffer->mutable_data(), values.data(), nbytes);
+ auto immutable_data = reinterpret_cast<const uint8_t*>(values.data());
+ std::copy(immutable_data, immutable_data + nbytes, buffer->mutable_data());
*result = buffer;
return Status::OK();
@@ -143,56 +140,173 @@ static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
// Sets approximately pct_null of the first n bytes in null_bytes to zero
// and the rest to non-zero (true) values.
static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
- Random rng(random_seed());
- for (int64_t i = 0; i < n; ++i) {
- null_bytes[i] = rng.NextDoubleFraction() > pct_null;
- }
+ const int random_seed = 0;
+ std::mt19937 gen(random_seed);
+ std::uniform_real_distribution<double> d(0.0, 1.0);
+ std::generate(null_bytes, null_bytes + n,
+ [&d, &gen, &pct_null] { return d(gen) > pct_null; });
}
static inline void random_is_valid(int64_t n, double pct_null,
std::vector<bool>* is_valid) {
- Random rng(random_seed());
- for (int64_t i = 0; i < n; ++i) {
- is_valid->push_back(rng.NextDoubleFraction() > pct_null);
- }
+ const int random_seed = 0;
+ std::mt19937 gen(random_seed);
+ std::uniform_real_distribution<double> d(0.0, 1.0);
+ is_valid->resize(n, false);
+ std::generate(is_valid->begin(), is_valid->end(),
+ [&d, &gen, &pct_null] { return d(gen) > pct_null; });
}
static inline void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
std::mt19937 gen(seed);
- std::uniform_int_distribution<int> d(0, 255);
+ std::uniform_int_distribution<int> d(0, std::numeric_limits<uint8_t>::max());
+ std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen) & 0xFF); });
+}
- for (int64_t i = 0; i < n; ++i) {
- out[i] = static_cast<uint8_t>(d(gen) & 0xFF);
+static void DecimalRange(int32_t precision, Decimal128* min_decimal,
+ Decimal128* max_decimal) {
+ DCHECK_GE(precision, 1) << "decimal precision must be greater than or equal to 1, got "
+ << precision;
+ DCHECK_LE(precision, 38) << "decimal precision must be less than or equal to 38, got "
+ << precision;
+
+ switch (precision) {
+ case 1:
+ case 2:
+ *max_decimal = std::numeric_limits<int8_t>::max();
+ break;
+ case 3:
+ case 4:
+ *max_decimal = std::numeric_limits<int16_t>::max();
+ break;
+ case 5:
+ case 6:
+ *max_decimal = 8388607;
+ break;
+ case 7:
+ case 8:
+ case 9:
+ *max_decimal = std::numeric_limits<int32_t>::max();
+ break;
+ case 10:
+ case 11:
+ *max_decimal = 549755813887;
+ break;
+ case 12:
+ case 13:
+ case 14:
+ *max_decimal = 140737488355327;
+ break;
+ case 15:
+ case 16:
+ *max_decimal = 36028797018963967;
+ break;
+ case 17:
+ case 18:
+ *max_decimal = std::numeric_limits<int64_t>::max();
+ break;
+ case 19:
+ case 20:
+ case 21:
+ *max_decimal = Decimal128("2361183241434822606847");
+ break;
+ case 22:
+ case 23:
+ *max_decimal = Decimal128("604462909807314587353087");
+ break;
+ case 24:
+ case 25:
+ case 26:
+ *max_decimal = Decimal128("154742504910672534362390527");
+ break;
+ case 27:
+ case 28:
+ *max_decimal = Decimal128("39614081257132168796771975167");
+ break;
+ case 29:
+ case 30:
+ case 31:
+ *max_decimal = Decimal128("10141204801825835211973625643007");
+ break;
+ case 32:
+ case 33:
+ *max_decimal = Decimal128("2596148429267413814265248164610047");
+ break;
+ case 34:
+ case 35:
+ *max_decimal = Decimal128("664613997892457936451903530140172287");
+ break;
+ case 36:
+ case 37:
+ case 38:
+ *max_decimal = Decimal128("170141183460469231731687303715884105727");
+ break;
+ default:
+ DCHECK(false);
+ break;
}
+
+ *min_decimal = ~(*max_decimal);
}
-static inline void random_ascii(int64_t n, uint32_t seed, uint8_t* out) {
+class UniformDecimalDistribution {
+ public:
+ explicit UniformDecimalDistribution(int32_t precision) {
+ Decimal128 max_decimal;
+ Decimal128 min_decimal;
+ DecimalRange(precision, &min_decimal, &max_decimal);
+
+ const auto min_low = static_cast<int64_t>(min_decimal.low_bits());
+ const auto max_low = static_cast<int64_t>(max_decimal.low_bits());
+
+ const int64_t min_high = min_decimal.high_bits();
+ const int64_t max_high = max_decimal.high_bits();
+
+ using param_type = std::uniform_int_distribution<int64_t>::param_type;
+
+ lower_dist_.param(param_type(min_low, max_low));
+ upper_dist_.param(param_type(min_high, max_high));
+ }
+
+ template <typename Generator>
+ Decimal128 operator()(Generator& gen) {
+ return Decimal128(upper_dist_(gen), static_cast<uint64_t>(lower_dist_(gen)));
+ }
+
+ private:
+ // The lower bits distribution is intentionally int64_t.
+ // If it were uint64_t then the size of the interval [min_high, max_high] would be 0
+ // because min_high > max_high due to 2's complement.
+ // So, we generate the same range of bits using int64_t and then cast to uint64_t.
+ std::uniform_int_distribution<int64_t> lower_dist_;
+ std::uniform_int_distribution<int64_t> upper_dist_;
+};
+
+static inline void random_decimals(int64_t n, uint32_t seed, int32_t precision,
+ uint8_t* out) {
std::mt19937 gen(seed);
- std::uniform_int_distribution<int> d(65, 122);
+ UniformDecimalDistribution dist(precision);
- for (int64_t i = 0; i < n; ++i) {
- out[i] = static_cast<uint8_t>(d(gen) & 0xFF);
+ for (int64_t i = 0; i < n; ++i, out += 16) {
+ const Decimal128 value(dist(gen));
+ value.ToBytes(out);
}
}
-template <typename T>
-void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, T* out) {
+template <typename T, typename U>
+void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
DCHECK(out || (n == 0));
std::mt19937 gen(seed);
std::uniform_int_distribution<T> d(min_value, max_value);
- for (int64_t i = 0; i < n; ++i) {
- out[i] = static_cast<T>(d(gen));
- }
+ std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+static inline void random_ascii(int64_t n, uint32_t seed, uint8_t* out) {
+ rand_uniform_int(n, seed, static_cast<int32_t>('A'), static_cast<int32_t>('z'), out);
}
static inline int64_t null_count(const std::vector<uint8_t>& valid_bytes) {
- int64_t result = 0;
- for (size_t i = 0; i < valid_bytes.size(); ++i) {
- if (valid_bytes[i] == 0) {
- ++result;
- }
- }
- return result;
+ return static_cast<int64_t>(std::count(valid_bytes.cbegin(), valid_bytes.cend(), '\0'));
}
Status MakeRandomInt32PoolBuffer(int64_t length, MemoryPool* pool,
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 5df5e74..7810a3b 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -38,7 +38,6 @@ install(FILES
logging.h
macros.h
parallel.h
- random.h
rle-encoding.h
sse-util.h
stl.h
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 9d94bef..cc18025 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -43,14 +43,17 @@ Decimal128::Decimal128(const uint8_t* bytes)
}
std::array<uint8_t, 16> Decimal128::ToBytes() const {
- const uint64_t raw[] = {BitUtil::ToLittleEndian(low_bits_),
- BitUtil::ToLittleEndian(static_cast<uint64_t>(high_bits_))};
- const auto* raw_data = reinterpret_cast<const uint8_t*>(raw);
std::array<uint8_t, 16> out{{0}};
- std::copy(raw_data, raw_data + out.size(), out.begin());
+ ToBytes(out.data());
return out;
}
+void Decimal128::ToBytes(uint8_t* out) const {
+ DCHECK_NE(out, NULLPTR);
+ reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
+ reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
+}
+
static constexpr Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
0xB34B9F1000000000);
static constexpr Decimal128 kTenTo18(0xDE0B6B3A7640000);
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 487f222..a0423e9 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -102,13 +102,14 @@ class ARROW_EXPORT Decimal128 {
Decimal128& operator>>=(uint32_t bits);
/// \brief Get the high bits of the two's complement representation of the number.
- int64_t high_bits() const { return high_bits_; }
+ inline int64_t high_bits() const { return high_bits_; }
/// \brief Get the low bits of the two's complement representation of the number.
- uint64_t low_bits() const { return low_bits_; }
+ inline uint64_t low_bits() const { return low_bits_; }
/// \brief Return the raw bytes of the value in little-endian byte order.
std::array<uint8_t, 16> ToBytes() const;
+ void ToBytes(uint8_t* out) const;
/// \brief Convert the Decimal128 value to a base 10 decimal string with the given
/// scale.
diff --git a/cpp/src/arrow/util/random.h b/cpp/src/arrow/util/random.h
deleted file mode 100644
index 2e05a73..0000000
--- a/cpp/src/arrow/util/random.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-// Moved from Kudu http://github.com/cloudera/kudu
-
-#ifndef ARROW_UTIL_RANDOM_H_
-#define ARROW_UTIL_RANDOM_H_
-
-#include <stdint.h>
-
-#include <cmath>
-
-namespace arrow {
-namespace internal {
-namespace random {
-
-static const uint32_t M = 2147483647L; // 2^31-1
-const double kTwoPi = 6.283185307179586476925286;
-
-} // namespace random
-} // namespace internal
-
-// A very simple random number generator. Not especially good at
-// generating truly random bits, but good enough for our needs in this
-// package. This implementation is not thread-safe.
-class Random {
- public:
- explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
- // Avoid bad seeds.
- if (seed_ == 0 || seed_ == internal::random::M) {
- seed_ = 1;
- }
- }
-
- // Next pseudo-random 32-bit unsigned integer.
- // FIXME: This currently only generates 31 bits of randomness.
- // The MSB will always be zero.
- uint32_t Next() {
- static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
- // We are computing
- // seed_ = (seed_ * A) % M, where M = 2^31-1
- //
- // seed_ must not be zero or M, or else all subsequent computed values
- // will be zero or M respectively. For all other values, seed_ will end
- // up cycling through every number in [1,M-1]
- uint64_t product = seed_ * A;
-
- // Compute (product % M) using the fact that ((x << 31) % M) == x.
- seed_ = static_cast<uint32_t>((product >> 31) + (product & internal::random::M));
- // The first reduction may overflow by 1 bit, so we may need to
- // repeat. mod == M is not possible; using > allows the faster
- // sign-bit-based test.
- if (seed_ > internal::random::M) {
- seed_ -= internal::random::M;
- }
- return seed_;
- }
-
- // Alias for consistency with Next64
- uint32_t Next32() { return Next(); }
-
- // Next pseudo-random 64-bit unsigned integer.
- // FIXME: This currently only generates 62 bits of randomness due to Next()
- // only giving 31 bits of randomness. The 2 most significant bits will always
- // be zero.
- uint64_t Next64() {
- uint64_t large = Next();
- // Only shift by 31 bits so we end up with zeros in MSB and not scattered
- // throughout the 64-bit word. This is due to the weakness in Next() noted
- // above.
- large <<= 31;
- large |= Next();
- return large;
- }
-
- // Returns a uniformly distributed value in the range [0..n-1]
- // REQUIRES: n > 0
- uint32_t Uniform(uint32_t n) { return Next() % n; }
-
- // Alias for consistency with Uniform64
- uint32_t Uniform32(uint32_t n) { return Uniform(n); }
-
- // Returns a uniformly distributed 64-bit value in the range [0..n-1]
- // REQUIRES: n > 0
- uint64_t Uniform64(uint64_t n) { return Next64() % n; }
-
- // Randomly returns true ~"1/n" of the time, and false otherwise.
- // REQUIRES: n > 0
- bool OneIn(int n) { return (Next() % n) == 0; }
-
- // Skewed: pick "base" uniformly from range [0,max_log] and then
- // return "base" random bits. The effect is to pick a number in the
- // range [0,2^max_log-1] with exponential bias towards smaller numbers.
- uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); }
-
- // Creates a normal distribution variable using the
- // Box-Muller transform. See:
- // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
- // Adapted from WebRTC source code at:
- // webrtc/trunk/modules/video_coding/main/test/test_util.cc
- double Normal(double mean, double std_dev) {
- double uniform1 = (Next() + 1.0) / (internal::random::M + 1.0);
- double uniform2 = (Next() + 1.0) / (internal::random::M + 1.0);
- return (mean +
- std_dev * sqrt(-2 * ::log(uniform1)) *
- cos(internal::random::kTwoPi * uniform2));
- }
-
- // Return a random number between 0.0 and 1.0 inclusive.
- double NextDoubleFraction() {
- return Next() / static_cast<double>(internal::random::M + 1.0);
- }
-
- private:
- uint32_t seed_;
-};
-
-uint32_t random_seed() {
- // TODO(wesm): use system time to get a reasonably random seed
- return 0;
-}
-
-} // namespace arrow
-
-#endif // ARROW_UTIL_RANDOM_H_
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].