You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2021/12/30 11:49:13 UTC
[arrow] branch master updated: ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ceaed97 ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1
ceaed97 is described below
commit ceaed97f010b4e1c67a34f73b683b1ca3df16930
Author: Alex Şuhan <al...@gmail.com>
AuthorDate: Thu Dec 30 12:46:57 2021 +0100
ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1
Closes #11864 from asuhan/asuhan/random_nullary
Lead-authored-by: Alex Şuhan <al...@gmail.com>
Co-authored-by: Antoine Pitrou <an...@python.org>
Co-authored-by: Yibo Cai <yi...@arm.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/compute/api_scalar.cc | 30 +++++
cpp/src/arrow/compute/api_scalar.h | 24 ++++
cpp/src/arrow/compute/kernels/CMakeLists.txt | 4 +-
.../arrow/compute/kernels/scalar_compare_test.cc | 13 ++-
.../arrow/compute/kernels/scalar_if_else_test.cc | 10 +-
.../arrow/compute/kernels/scalar_nested_test.cc | 35 +++---
cpp/src/arrow/compute/kernels/scalar_random.cc | 105 ++++++++++++++++++
.../compute/kernels/scalar_random_benchmark.cc | 56 ++++++++++
.../arrow/compute/kernels/scalar_random_test.cc | 123 +++++++++++++++++++++
cpp/src/arrow/compute/registry.cc | 1 +
cpp/src/arrow/compute/registry_internal.h | 1 +
docs/source/cpp/compute.rst | 17 ++-
python/pyarrow/tests/test_compute.py | 4 +-
14 files changed, 392 insertions(+), 32 deletions(-)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index cc979a2..12d5f41 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -420,6 +420,7 @@ if(ARROW_COMPUTE)
compute/kernels/scalar_compare.cc
compute/kernels/scalar_if_else.cc
compute/kernels/scalar_nested.cc
+ compute/kernels/scalar_random.cc
compute/kernels/scalar_set_lookup.cc
compute/kernels/scalar_string.cc
compute/kernels/scalar_temporal_binary.cc
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 24a5a1e..021499a 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -197,6 +197,23 @@ struct EnumTraits<compute::Utf8NormalizeOptions::Form>
}
};
+template <>
+struct EnumTraits<compute::RandomOptions::Initializer>
+ : BasicEnumTraits<compute::RandomOptions::Initializer,
+ compute::RandomOptions::Initializer::SystemRandom,
+ compute::RandomOptions::Initializer::Seed> {
+ static std::string name() { return "RandomOptions::Initializer"; }
+ static std::string value_name(compute::RandomOptions::Initializer value) {
+ switch (value) {
+ case compute::RandomOptions::Initializer::SystemRandom:
+ return "SystemRandom";
+ case compute::RandomOptions::Initializer::Seed:
+ return "Seed";
+ }
+ return "<INVALID>";
+ }
+};
+
} // namespace internal
namespace compute {
@@ -280,6 +297,10 @@ static auto kWeekOptionsType = GetFunctionOptionsType<WeekOptions>(
DataMember("week_starts_monday", &WeekOptions::week_starts_monday),
DataMember("count_from_zero", &WeekOptions::count_from_zero),
DataMember("first_week_is_fully_in_year", &WeekOptions::first_week_is_fully_in_year));
+static auto kRandomOptionsType = GetFunctionOptionsType<RandomOptions>(
+ DataMember("length", &RandomOptions::length),
+ DataMember("initializer", &RandomOptions::initializer),
+ DataMember("seed", &RandomOptions::seed));
} // namespace
} // namespace internal
@@ -467,6 +488,14 @@ WeekOptions::WeekOptions(bool week_starts_monday, bool count_from_zero,
first_week_is_fully_in_year(first_week_is_fully_in_year) {}
constexpr char WeekOptions::kTypeName[];
+RandomOptions::RandomOptions(int64_t length, Initializer initializer, uint64_t seed)
+ : FunctionOptions(internal::kRandomOptionsType),
+ length(length),
+ initializer(initializer),
+ seed(seed) {}
+RandomOptions::RandomOptions() : RandomOptions(0, SystemRandom, 0) {}
+constexpr char RandomOptions::kTypeName[];
+
namespace internal {
void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
@@ -493,6 +522,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kUtf8NormalizeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kWeekOptionsType));
+ DCHECK_OK(registry->AddFunctionOptionsType(kRandomOptionsType));
}
} // namespace internal
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 6e1c1ac..3d92215 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -420,6 +420,30 @@ struct ARROW_EXPORT Utf8NormalizeOptions : public FunctionOptions {
Form form;
};
+class ARROW_EXPORT RandomOptions : public FunctionOptions {
+ public:
+ enum Initializer { SystemRandom, Seed };
+
+ static RandomOptions FromSystemRandom(int64_t length) {
+ return RandomOptions{length, SystemRandom, 0};
+ }
+ static RandomOptions FromSeed(int64_t length, uint64_t seed) {
+ return RandomOptions{length, Seed, seed};
+ }
+
+ RandomOptions(int64_t length, Initializer initializer, uint64_t seed);
+ RandomOptions();
+ constexpr static char const kTypeName[] = "RandomOptions";
+ static RandomOptions Defaults() { return RandomOptions(); }
+
+ /// The length of the array returned. Negative is invalid.
+ int64_t length;
+ /// The type of initialization for random number generation - system or provided seed.
+ Initializer initializer;
+ /// The seed value used to initialize the random number generation.
+ uint64_t seed;
+};
+
/// @}
/// \brief Get the absolute value of a value.
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 28686a9..93a02cd 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -24,12 +24,13 @@ add_arrow_compute_test(scalar_test
scalar_boolean_test.cc
scalar_cast_test.cc
scalar_compare_test.cc
+ scalar_if_else_test.cc
scalar_nested_test.cc
+ scalar_random_test.cc
scalar_set_lookup_test.cc
scalar_string_test.cc
scalar_temporal_test.cc
scalar_validity_test.cc
- scalar_if_else_test.cc
test_util.cc)
add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
@@ -37,6 +38,7 @@ add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 64abb9f..0fa97e1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -1211,18 +1211,19 @@ class TestVarArgsCompareParametricTemporal : public TestVarArgsCompare<T> {
Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
};
-using NumericBasedTypes =
+using CompareNumericBasedTypes =
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type>;
-using ParametricTemporalTypes = ::testing::Types<TimestampType, Time32Type, Time64Type>;
-using FixedSizeBinaryTypes = ::testing::Types<FixedSizeBinaryType>;
+using CompareParametricTemporalTypes =
+ ::testing::Types<TimestampType, Time32Type, Time64Type>;
+using CompareFixedSizeBinaryTypes = ::testing::Types<FixedSizeBinaryType>;
-TYPED_TEST_SUITE(TestVarArgsCompareNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareNumeric, CompareNumericBasedTypes);
TYPED_TEST_SUITE(TestVarArgsCompareDecimal, DecimalArrowTypes);
TYPED_TEST_SUITE(TestVarArgsCompareFloating, RealArrowTypes);
-TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, ParametricTemporalTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, CompareParametricTemporalTypes);
TYPED_TEST_SUITE(TestVarArgsCompareBinary, BaseBinaryArrowTypes);
-TYPED_TEST_SUITE(TestVarArgsCompareFixedSizeBinary, FixedSizeBinaryTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareFixedSizeBinary, CompareFixedSizeBinaryTypes);
TYPED_TEST(TestVarArgsCompareNumeric, MinElementWise) {
this->AssertNullScalar(MinElementWise, {});
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 6f219af..711b318 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -61,12 +61,12 @@ class TestIfElseKernel : public ::testing::Test {};
template <typename Type>
class TestIfElsePrimitive : public ::testing::Test {};
-using NumericBasedTypes =
+using IfElseNumericBasedTypes =
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
Time32Type, Time64Type, TimestampType, MonthIntervalType>;
-TYPED_TEST_SUITE(TestIfElsePrimitive, NumericBasedTypes);
+TYPED_TEST_SUITE(TestIfElsePrimitive, IfElseNumericBasedTypes);
TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
@@ -960,7 +960,7 @@ TYPED_TEST(TestIfElseDict, DifferentDictionaries) {
template <typename Type>
class TestCaseWhenNumeric : public ::testing::Test {};
-TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
Datum MakeStruct(const std::vector<Datum>& conds) {
EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
@@ -2176,7 +2176,7 @@ class TestCoalesceBinary : public ::testing::Test {};
template <typename Type>
class TestCoalesceList : public ::testing::Test {};
-TYPED_TEST_SUITE(TestCoalesceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCoalesceNumeric, IfElseNumericBasedTypes);
TYPED_TEST_SUITE(TestCoalesceBinary, BaseBinaryArrowTypes);
TYPED_TEST_SUITE(TestCoalesceList, ListArrowTypes);
@@ -2929,7 +2929,7 @@ class TestChooseNumeric : public ::testing::Test {};
template <typename Type>
class TestChooseBinary : public ::testing::Test {};
-TYPED_TEST_SUITE(TestChooseNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestChooseNumeric, IfElseNumericBasedTypes);
TYPED_TEST_SUITE(TestChooseBinary, BaseBinaryArrowTypes);
TYPED_TEST(TestChooseNumeric, FixedSize) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 1f78fd6..4640e1e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -236,25 +236,25 @@ struct {
MakeStructOptions opts{field_names, options...};
return CallFunction("make_struct", args, &opts);
}
-} MakeStruct;
+} MakeStructor;
TEST(MakeStruct, Scalar) {
auto i32 = MakeScalar(1);
auto f64 = MakeScalar(2.5);
auto str = MakeScalar("yo");
- EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+ EXPECT_THAT(MakeStructor({i32, f64, str}, {"i", "f", "s"}),
ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
// Names default to field_index
- EXPECT_THAT(MakeStruct({i32, f64, str}),
+ EXPECT_THAT(MakeStructor({i32, f64, str}),
ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
// No field names or input values is fine
- EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+ EXPECT_THAT(MakeStructor({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
// Three field names but one input value
- EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
+ EXPECT_THAT(MakeStructor({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
}
TEST(MakeStruct, Array) {
@@ -263,15 +263,16 @@ TEST(MakeStruct, Array) {
auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
- EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+ EXPECT_THAT(MakeStructor({i32, str}, {"i", "s"}),
ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
// Scalars are broadcast to the length of the arrays
- EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+ EXPECT_THAT(MakeStructor({i32, MakeScalar("aa")}, {"i", "s"}),
ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
// Array length mismatch
- EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
+ EXPECT_THAT(MakeStructor({i32->Slice(1), str}, field_names),
+ Raises(StatusCode::Invalid));
}
TEST(MakeStruct, NullableMetadataPassedThru) {
@@ -284,7 +285,7 @@ TEST(MakeStruct, NullableMetadataPassedThru) {
key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
ASSERT_OK_AND_ASSIGN(auto proj,
- MakeStruct({i32, str}, field_names, nullability, metadata));
+ MakeStructor({i32, str}, field_names, nullability, metadata));
AssertTypeEqual(*proj.type(), StructType({
field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -292,8 +293,8 @@ TEST(MakeStruct, NullableMetadataPassedThru) {
}));
// error: projecting an array containing nulls with nullable=false
- EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
- field_names, nullability, metadata),
+ EXPECT_THAT(MakeStructor({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+ field_names, nullability, metadata),
Raises(StatusCode::Invalid));
}
@@ -317,13 +318,13 @@ TEST(MakeStruct, ChunkedArray) {
ASSERT_OK_AND_ASSIGN(Datum expected,
ChunkedArray::Make({expected_0, expected_1, expected_2}));
- ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStructor({i32, str}, field_names));
// Scalars are broadcast to the length of the arrays
- ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStructor({i32, MakeScalar("aa")}, field_names));
// Array length mismatch
- ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
+ ASSERT_RAISES(Invalid, MakeStructor({i32->Slice(1), str}, field_names));
}
TEST(MakeStruct, ChunkedArrayDifferentChunking) {
@@ -354,13 +355,13 @@ TEST(MakeStruct, ChunkedArrayDifferentChunking) {
ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
- ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStructor({i32, str}, field_names));
// Scalars are broadcast to the length of the arrays
- ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStructor({i32, MakeScalar("aa")}, field_names));
// Array length mismatch
- ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
+ ASSERT_RAISES(Invalid, MakeStructor({i32->Slice(1), str}, field_names));
}
} // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
new file mode 100644
index 0000000..f4f026f
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <mutex>
+#include <random>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/pcg_random.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+// Generates a random floating point number in range [0, 1).
+double generate_uniform(random::pcg64_fast* rng) {
+ // This equation is copied from numpy. It calculates `rng() / 2^64` and
+ // the return value is strictly less than 1.
+ static_assert(random::pcg64_fast::min() == 0ULL, "");
+ static_assert(random::pcg64_fast::max() == ~0ULL, "");
+ return ((*rng)() >> 11) * (1.0 / 9007199254740992.0);
+}
+
+using RandomState = OptionsWrapper<RandomOptions>;
+
+random::pcg64_fast MakeSeedGenerator() {
+ arrow_vendored::pcg_extras::seed_seq_from<std::random_device> seed_source;
+ random::pcg64_fast seed_gen(seed_source);
+ return seed_gen;
+}
+
+Status ExecRandom(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ static random::pcg64_fast seed_gen = MakeSeedGenerator();
+ static std::mutex seed_gen_mutex;
+
+ random::pcg64_fast gen;
+ const RandomOptions& options = RandomState::Get(ctx);
+ if (options.length < 0) {
+ return Status::Invalid("Negative number of elements");
+ }
+
+ auto out_data = ArrayData::Make(float64(), options.length, 0);
+ out_data->buffers.resize(2, nullptr);
+
+ ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+ ctx->Allocate(options.length * sizeof(double)));
+ double* out_buffer = out_data->template GetMutableValues<double>(1);
+
+ if (options.initializer == RandomOptions::Seed) {
+ gen.seed(options.seed);
+ } else {
+ std::lock_guard<std::mutex> seed_gen_lock(seed_gen_mutex);
+ gen.seed(seed_gen());
+ }
+ for (int64_t i = 0; i < options.length; ++i) {
+ out_buffer[i] = generate_uniform(&gen);
+ }
+ *out = std::move(out_data);
+ return Status::OK();
+}
+
+const FunctionDoc random_doc{
+ "Generate numbers in the range [0, 1)",
+ ("Generated values are uniformly-distributed, double-precision in range [0, 1).\n"
+ "Length of generated data, algorithm and seed can be changed via RandomOptions."),
+ {},
+ "RandomOptions"};
+
+} // namespace
+
+void RegisterScalarRandom(FunctionRegistry* registry) {
+ static auto random_options = RandomOptions::Defaults();
+
+ auto random_func = std::make_shared<ScalarFunction>("random", Arity::Nullary(),
+ &random_doc, &random_options);
+ ScalarKernel kernel{
+ {}, ValueDescr(float64(), ValueDescr::Shape::ARRAY), ExecRandom, RandomState::Init};
+ kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+ DCHECK_OK(random_func->AddKernel(kernel));
+ DCHECK_OK(registry->AddFunction(std::move(random_func)));
+}
+
+} // namespace internal
+} // namespace compute
+} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
new file mode 100644
index 0000000..51dbd08
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+namespace compute {
+
+static void RandomKernel(benchmark::State& state, bool is_seed) {
+ const int64_t length = state.range(0);
+ const auto options = is_seed ? RandomOptions::FromSeed(length, 42)
+ : RandomOptions::FromSystemRandom(length);
+ for (auto _ : state) {
+ ABORT_NOT_OK(CallFunction("random", {}, &options).status());
+ }
+ state.SetItemsProcessed(state.iterations() * length);
+}
+
+static void RandomKernelSystem(benchmark::State& state) {
+ RandomKernel(state, /*is_seed=*/false);
+}
+
+static void RandomKernelSeed(benchmark::State& state) {
+ RandomKernel(state, /*is_seed=*/true);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+ for (int64_t length : {1, 64, 1024, 65536}) {
+ bench->Arg(length);
+ }
+}
+
+BENCHMARK(RandomKernelSystem)->Apply(SetArgs);
+BENCHMARK(RandomKernelSeed)->Apply(SetArgs);
+
+} // namespace compute
+} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_test.cc b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
new file mode 100644
index 0000000..b4003fc
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::ThreadPool;
+
+namespace compute {
+
+namespace {
+
+void TestRandomWithOptions(const RandomOptions& random_options) {
+ ASSERT_OK_AND_ASSIGN(Datum result, CallFunction("random", {}, &random_options));
+ const auto result_array = result.make_array();
+ ValidateOutput(*result_array);
+ ASSERT_EQ(result_array->length(), random_options.length);
+ ASSERT_EQ(result_array->null_count(), 0);
+ AssertTypeEqual(result_array->type(), float64());
+
+ if (random_options.length > 0) {
+ // verify E(X), E(X^2) is near theory
+ double sum = 0, square_sum = 0;
+ const double* values = result_array->data()->GetValues<double>(1);
+ for (int64_t i = 0; i < random_options.length; ++i) {
+ const double value = values[i];
+ ASSERT_GE(value, 0);
+ ASSERT_LT(value, 1);
+ sum += value;
+ square_sum += value * value;
+ }
+ const double E_X = 0.5;
+ const double E_X2 = 1.0 / 12 + E_X * E_X;
+ ASSERT_NEAR(sum / random_options.length, E_X, E_X * 0.02);
+ ASSERT_NEAR(square_sum / random_options.length, E_X2, E_X2 * 0.02);
+ }
+}
+
+} // namespace
+
+TEST(TestRandom, Seed) {
+ const int kCount = 100000;
+ auto random_options = RandomOptions::FromSeed(/*length=*/kCount, /*seed=*/0);
+ TestRandomWithOptions(random_options);
+}
+
+TEST(TestRandom, SystemRandom) {
+ const int kCount = 100000;
+ auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+ TestRandomWithOptions(random_options);
+}
+
+TEST(TestRandom, SeedIsDeterministic) {
+ const int kCount = 100;
+ auto random_options = RandomOptions::FromSeed(/*length=*/kCount, /*seed=*/0);
+ ASSERT_OK_AND_ASSIGN(Datum first_call, CallFunction("random", {}, &random_options));
+ ASSERT_OK_AND_ASSIGN(Datum second_call, CallFunction("random", {}, &random_options));
+ AssertDatumsEqual(first_call, second_call);
+}
+
+TEST(TestRandom, SystemRandomDifferentResultsSingleThreaded) {
+ const int kCount = 100;
+ auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+ ASSERT_OK_AND_ASSIGN(Datum first_datum, CallFunction("random", {}, &random_options));
+ ASSERT_OK_AND_ASSIGN(Datum second_datum, CallFunction("random", {}, &random_options));
+ ASSERT_FALSE(first_datum.Equals(second_datum));
+}
+
+TEST(TestRandom, SystemRandomDifferentResultsMultiThreaded) {
+ const int kCount = 100;
+ const int kThreadCount = 8;
+ const int kCallCount = 200;
+
+ ASSERT_OK_AND_ASSIGN(auto pool, ThreadPool::Make(kThreadCount));
+
+ auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+ std::vector<Future<Datum>> futures;
+
+ for (int i = 0; i < kCallCount; ++i) {
+ futures.push_back(DeferNotOk(
+ pool->Submit([&]() { return CallFunction("random", {}, &random_options); })));
+ }
+ std::vector<Datum> call_results(kCallCount);
+ for (int i = 0; i < kCallCount; ++i) {
+ ASSERT_OK_AND_ASSIGN(call_results[i], futures[i].result());
+ }
+ for (int i = 0; i < kThreadCount - 1; ++i) {
+ for (int j = i + 1; j < kThreadCount; ++j) {
+ ASSERT_FALSE(call_results[i].Equals(call_results[j]));
+ }
+ }
+}
+
+TEST(TestRandom, Length) {
+ auto random_options = RandomOptions::FromSystemRandom(/*length=*/0);
+ TestRandomWithOptions(random_options);
+
+ random_options = RandomOptions::FromSystemRandom(/*length=*/-1);
+ ASSERT_RAISES(Invalid, CallFunction("random", {}, &random_options));
+}
+
+} // namespace compute
+} // namespace arrow
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index c645587..600251c 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -162,6 +162,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
RegisterScalarComparison(registry.get());
RegisterScalarIfElse(registry.get());
RegisterScalarNested(registry.get());
+ RegisterScalarRandom(registry.get()); // Nullary
RegisterScalarSetLookup(registry.get());
RegisterScalarStringAscii(registry.get());
RegisterScalarTemporalBinary(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index 98f6118..3a70ff9 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -31,6 +31,7 @@ void RegisterScalarCast(FunctionRegistry* registry);
void RegisterScalarComparison(FunctionRegistry* registry);
void RegisterScalarIfElse(FunctionRegistry* registry);
void RegisterScalarNested(FunctionRegistry* registry);
+void RegisterScalarRandom(FunctionRegistry* registry); // Nullary
void RegisterScalarSetLookup(FunctionRegistry* registry);
void RegisterScalarStringAscii(FunctionRegistry* registry);
void RegisterScalarTemporalBinary(FunctionRegistry* registry);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 8699f91..e88bd4a 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1388,7 +1388,7 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| second | Unary | Timestamp, Time | Int64 | | |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
-| subsecond | Unary | Timestamp, Time | Double | | |
+| subsecond | Unary | Timestamp, Time | Float64 | | |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| us_week | Unary | Temporal | Int64 | | \(4) |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
@@ -1489,6 +1489,21 @@ An error is returned if the timestamps already have the timezone metadata set.
allows choosing the behaviour when a timestamp is ambiguous or nonexistent
in the given timezone (because of DST shifts).
+Random number generation
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+This function generates an array of uniformly-distributed double-precision numbers
+in range [0, 1). The options provide the length of the output and the algorithm for
+generating the random numbers, using either a seed or a system-provided, platform-specific
+random generator.
+
++--------------------+------------+---------------+-------------------------+
+| Function name | Arity | Output type | Options class |
++====================+============+===============+=========================+
+| random | Nullary | Float64 | :struct:`RandomOptions` |
++--------------------+------------+---------------+-------------------------+
+
+
Array-wise ("vector") functions
-------------------------------
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index cacc40d..9e26208 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -91,6 +91,8 @@ def test_exported_functions():
# message if we don't pass an options instance.
continue
arity = desc['arity']
+ if arity == 0:
+ continue
if arity is Ellipsis:
args = [object()] * 3
else:
@@ -274,8 +276,6 @@ def test_function_attributes():
kernels = func.kernels
assert func.num_kernels == len(kernels)
assert all(isinstance(ker, pc.Kernel) for ker in kernels)
- if func.arity is not Ellipsis:
- assert func.arity >= 1
repr(func)
for ker in kernels:
repr(ker)