You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2021/12/30 11:49:13 UTC

[arrow] branch master updated: ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ceaed97  ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1
ceaed97 is described below

commit ceaed97f010b4e1c67a34f73b683b1ca3df16930
Author: Alex Şuhan <al...@gmail.com>
AuthorDate: Thu Dec 30 12:46:57 2021 +0100

    ARROW-12404: [C++] Implement "random" nullary function that generates uniform random between 0 and 1
    
    Closes #11864 from asuhan/asuhan/random_nullary
    
    Lead-authored-by: Alex Şuhan <al...@gmail.com>
    Co-authored-by: Antoine Pitrou <an...@python.org>
    Co-authored-by: Yibo Cai <yi...@arm.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/CMakeLists.txt                       |   1 +
 cpp/src/arrow/compute/api_scalar.cc                |  30 +++++
 cpp/src/arrow/compute/api_scalar.h                 |  24 ++++
 cpp/src/arrow/compute/kernels/CMakeLists.txt       |   4 +-
 .../arrow/compute/kernels/scalar_compare_test.cc   |  13 ++-
 .../arrow/compute/kernels/scalar_if_else_test.cc   |  10 +-
 .../arrow/compute/kernels/scalar_nested_test.cc    |  35 +++---
 cpp/src/arrow/compute/kernels/scalar_random.cc     | 105 ++++++++++++++++++
 .../compute/kernels/scalar_random_benchmark.cc     |  56 ++++++++++
 .../arrow/compute/kernels/scalar_random_test.cc    | 123 +++++++++++++++++++++
 cpp/src/arrow/compute/registry.cc                  |   1 +
 cpp/src/arrow/compute/registry_internal.h          |   1 +
 docs/source/cpp/compute.rst                        |  17 ++-
 python/pyarrow/tests/test_compute.py               |   4 +-
 14 files changed, 392 insertions(+), 32 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index cc979a2..12d5f41 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -420,6 +420,7 @@ if(ARROW_COMPUTE)
        compute/kernels/scalar_compare.cc
        compute/kernels/scalar_if_else.cc
        compute/kernels/scalar_nested.cc
+       compute/kernels/scalar_random.cc
        compute/kernels/scalar_set_lookup.cc
        compute/kernels/scalar_string.cc
        compute/kernels/scalar_temporal_binary.cc
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 24a5a1e..021499a 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -197,6 +197,23 @@ struct EnumTraits<compute::Utf8NormalizeOptions::Form>
   }
 };
 
+template <>
+struct EnumTraits<compute::RandomOptions::Initializer>
+    : BasicEnumTraits<compute::RandomOptions::Initializer,
+                      compute::RandomOptions::Initializer::SystemRandom,
+                      compute::RandomOptions::Initializer::Seed> {
+  static std::string name() { return "RandomOptions::Initializer"; }
+  static std::string value_name(compute::RandomOptions::Initializer value) {
+    switch (value) {
+      case compute::RandomOptions::Initializer::SystemRandom:
+        return "SystemRandom";
+      case compute::RandomOptions::Initializer::Seed:
+        return "Seed";
+    }
+    return "<INVALID>";
+  }
+};
+
 }  // namespace internal
 
 namespace compute {
@@ -280,6 +297,10 @@ static auto kWeekOptionsType = GetFunctionOptionsType<WeekOptions>(
     DataMember("week_starts_monday", &WeekOptions::week_starts_monday),
     DataMember("count_from_zero", &WeekOptions::count_from_zero),
     DataMember("first_week_is_fully_in_year", &WeekOptions::first_week_is_fully_in_year));
+static auto kRandomOptionsType = GetFunctionOptionsType<RandomOptions>(
+    DataMember("length", &RandomOptions::length),
+    DataMember("initializer", &RandomOptions::initializer),
+    DataMember("seed", &RandomOptions::seed));
 }  // namespace
 }  // namespace internal
 
@@ -467,6 +488,14 @@ WeekOptions::WeekOptions(bool week_starts_monday, bool count_from_zero,
       first_week_is_fully_in_year(first_week_is_fully_in_year) {}
 constexpr char WeekOptions::kTypeName[];
 
+RandomOptions::RandomOptions(int64_t length, Initializer initializer, uint64_t seed)
+    : FunctionOptions(internal::kRandomOptionsType),
+      length(length),
+      initializer(initializer),
+      seed(seed) {}
+RandomOptions::RandomOptions() : RandomOptions(0, SystemRandom, 0) {}
+constexpr char RandomOptions::kTypeName[];
+
 namespace internal {
 void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
@@ -493,6 +522,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kUtf8NormalizeOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kWeekOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kRandomOptionsType));
 }
 }  // namespace internal
 
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 6e1c1ac..3d92215 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -420,6 +420,30 @@ struct ARROW_EXPORT Utf8NormalizeOptions : public FunctionOptions {
   Form form;
 };
 
+class ARROW_EXPORT RandomOptions : public FunctionOptions {
+ public:
+  enum Initializer { SystemRandom, Seed };
+
+  static RandomOptions FromSystemRandom(int64_t length) {
+    return RandomOptions{length, SystemRandom, 0};
+  }
+  static RandomOptions FromSeed(int64_t length, uint64_t seed) {
+    return RandomOptions{length, Seed, seed};
+  }
+
+  RandomOptions(int64_t length, Initializer initializer, uint64_t seed);
+  RandomOptions();
+  constexpr static char const kTypeName[] = "RandomOptions";
+  static RandomOptions Defaults() { return RandomOptions(); }
+
+  /// The length of the array returned. Negative is invalid.
+  int64_t length;
+  /// The type of initialization for random number generation - system or provided seed.
+  Initializer initializer;
+  /// The seed value used to initialize the random number generation.
+  uint64_t seed;
+};
+
 /// @}
 
 /// \brief Get the absolute value of a value.
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 28686a9..93a02cd 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -24,12 +24,13 @@ add_arrow_compute_test(scalar_test
                        scalar_boolean_test.cc
                        scalar_cast_test.cc
                        scalar_compare_test.cc
+                       scalar_if_else_test.cc
                        scalar_nested_test.cc
+                       scalar_random_test.cc
                        scalar_set_lookup_test.cc
                        scalar_string_test.cc
                        scalar_temporal_test.cc
                        scalar_validity_test.cc
-                       scalar_if_else_test.cc
                        test_util.cc)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
@@ -37,6 +38,7 @@ add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 64abb9f..0fa97e1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -1211,18 +1211,19 @@ class TestVarArgsCompareParametricTemporal : public TestVarArgsCompare<T> {
   Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
 };
 
-using NumericBasedTypes =
+using CompareNumericBasedTypes =
     ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
                      Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type>;
-using ParametricTemporalTypes = ::testing::Types<TimestampType, Time32Type, Time64Type>;
-using FixedSizeBinaryTypes = ::testing::Types<FixedSizeBinaryType>;
+using CompareParametricTemporalTypes =
+    ::testing::Types<TimestampType, Time32Type, Time64Type>;
+using CompareFixedSizeBinaryTypes = ::testing::Types<FixedSizeBinaryType>;
 
-TYPED_TEST_SUITE(TestVarArgsCompareNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareNumeric, CompareNumericBasedTypes);
 TYPED_TEST_SUITE(TestVarArgsCompareDecimal, DecimalArrowTypes);
 TYPED_TEST_SUITE(TestVarArgsCompareFloating, RealArrowTypes);
-TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, ParametricTemporalTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, CompareParametricTemporalTypes);
 TYPED_TEST_SUITE(TestVarArgsCompareBinary, BaseBinaryArrowTypes);
-TYPED_TEST_SUITE(TestVarArgsCompareFixedSizeBinary, FixedSizeBinaryTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareFixedSizeBinary, CompareFixedSizeBinaryTypes);
 
 TYPED_TEST(TestVarArgsCompareNumeric, MinElementWise) {
   this->AssertNullScalar(MinElementWise, {});
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 6f219af..711b318 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -61,12 +61,12 @@ class TestIfElseKernel : public ::testing::Test {};
 template <typename Type>
 class TestIfElsePrimitive : public ::testing::Test {};
 
-using NumericBasedTypes =
+using IfElseNumericBasedTypes =
     ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
                      Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
                      Time32Type, Time64Type, TimestampType, MonthIntervalType>;
 
-TYPED_TEST_SUITE(TestIfElsePrimitive, NumericBasedTypes);
+TYPED_TEST_SUITE(TestIfElsePrimitive, IfElseNumericBasedTypes);
 
 TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
   using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
@@ -960,7 +960,7 @@ TYPED_TEST(TestIfElseDict, DifferentDictionaries) {
 template <typename Type>
 class TestCaseWhenNumeric : public ::testing::Test {};
 
-TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
 
 Datum MakeStruct(const std::vector<Datum>& conds) {
   EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
@@ -2176,7 +2176,7 @@ class TestCoalesceBinary : public ::testing::Test {};
 template <typename Type>
 class TestCoalesceList : public ::testing::Test {};
 
-TYPED_TEST_SUITE(TestCoalesceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCoalesceNumeric, IfElseNumericBasedTypes);
 TYPED_TEST_SUITE(TestCoalesceBinary, BaseBinaryArrowTypes);
 TYPED_TEST_SUITE(TestCoalesceList, ListArrowTypes);
 
@@ -2929,7 +2929,7 @@ class TestChooseNumeric : public ::testing::Test {};
 template <typename Type>
 class TestChooseBinary : public ::testing::Test {};
 
-TYPED_TEST_SUITE(TestChooseNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestChooseNumeric, IfElseNumericBasedTypes);
 TYPED_TEST_SUITE(TestChooseBinary, BaseBinaryArrowTypes);
 
 TYPED_TEST(TestChooseNumeric, FixedSize) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 1f78fd6..4640e1e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -236,25 +236,25 @@ struct {
     MakeStructOptions opts{field_names, options...};
     return CallFunction("make_struct", args, &opts);
   }
-} MakeStruct;
+} MakeStructor;
 
 TEST(MakeStruct, Scalar) {
   auto i32 = MakeScalar(1);
   auto f64 = MakeScalar(2.5);
   auto str = MakeScalar("yo");
 
-  EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+  EXPECT_THAT(MakeStructor({i32, f64, str}, {"i", "f", "s"}),
               ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
 
   // Names default to field_index
-  EXPECT_THAT(MakeStruct({i32, f64, str}),
+  EXPECT_THAT(MakeStructor({i32, f64, str}),
               ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
   // No field names or input values is fine
-  EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+  EXPECT_THAT(MakeStructor({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
 
   // Three field names but one input value
-  EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
+  EXPECT_THAT(MakeStructor({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
 }
 
 TEST(MakeStruct, Array) {
@@ -263,15 +263,16 @@ TEST(MakeStruct, Array) {
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
 
-  EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+  EXPECT_THAT(MakeStructor({i32, str}, {"i", "s"}),
               ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Scalars are broadcast to the length of the arrays
-  EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+  EXPECT_THAT(MakeStructor({i32, MakeScalar("aa")}, {"i", "s"}),
               ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Array length mismatch
-  EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
+  EXPECT_THAT(MakeStructor({i32->Slice(1), str}, field_names),
+              Raises(StatusCode::Invalid));
 }
 
 TEST(MakeStruct, NullableMetadataPassedThru) {
@@ -284,7 +285,7 @@ TEST(MakeStruct, NullableMetadataPassedThru) {
       key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
 
   ASSERT_OK_AND_ASSIGN(auto proj,
-                       MakeStruct({i32, str}, field_names, nullability, metadata));
+                       MakeStructor({i32, str}, field_names, nullability, metadata));
 
   AssertTypeEqual(*proj.type(), StructType({
                                     field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -292,8 +293,8 @@ TEST(MakeStruct, NullableMetadataPassedThru) {
                                 }));
 
   // error: projecting an array containing nulls with nullable=false
-  EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
-                         field_names, nullability, metadata),
+  EXPECT_THAT(MakeStructor({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+                           field_names, nullability, metadata),
               Raises(StatusCode::Invalid));
 }
 
@@ -317,13 +318,13 @@ TEST(MakeStruct, ChunkedArray) {
   ASSERT_OK_AND_ASSIGN(Datum expected,
                        ChunkedArray::Make({expected_0, expected_1, expected_2}));
 
-  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStructor({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStructor({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStructor({i32->Slice(1), str}, field_names));
 }
 
 TEST(MakeStruct, ChunkedArrayDifferentChunking) {
@@ -354,13 +355,13 @@ TEST(MakeStruct, ChunkedArrayDifferentChunking) {
 
   ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
 
-  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStructor({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStructor({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStructor({i32->Slice(1), str}, field_names));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
new file mode 100644
index 0000000..f4f026f
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <mutex>
+#include <random>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/pcg_random.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+// Generates a random floating point number in range [0, 1).
+double generate_uniform(random::pcg64_fast* rng) {
+  // This equation is copied from numpy. It calculates `rng() / 2^64` and
+  // the return value is strictly less than 1.
+  static_assert(random::pcg64_fast::min() == 0ULL, "");
+  static_assert(random::pcg64_fast::max() == ~0ULL, "");
+  return ((*rng)() >> 11) * (1.0 / 9007199254740992.0);
+}
+
+using RandomState = OptionsWrapper<RandomOptions>;
+
+random::pcg64_fast MakeSeedGenerator() {
+  arrow_vendored::pcg_extras::seed_seq_from<std::random_device> seed_source;
+  random::pcg64_fast seed_gen(seed_source);
+  return seed_gen;
+}
+
+Status ExecRandom(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static random::pcg64_fast seed_gen = MakeSeedGenerator();
+  static std::mutex seed_gen_mutex;
+
+  random::pcg64_fast gen;
+  const RandomOptions& options = RandomState::Get(ctx);
+  if (options.length < 0) {
+    return Status::Invalid("Negative number of elements");
+  }
+
+  auto out_data = ArrayData::Make(float64(), options.length, 0);
+  out_data->buffers.resize(2, nullptr);
+
+  ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                        ctx->Allocate(options.length * sizeof(double)));
+  double* out_buffer = out_data->template GetMutableValues<double>(1);
+
+  if (options.initializer == RandomOptions::Seed) {
+    gen.seed(options.seed);
+  } else {
+    std::lock_guard<std::mutex> seed_gen_lock(seed_gen_mutex);
+    gen.seed(seed_gen());
+  }
+  for (int64_t i = 0; i < options.length; ++i) {
+    out_buffer[i] = generate_uniform(&gen);
+  }
+  *out = std::move(out_data);
+  return Status::OK();
+}
+
+const FunctionDoc random_doc{
+    "Generate numbers in the range [0, 1)",
+    ("Generated values are uniformly-distributed, double-precision in range [0, 1).\n"
+     "Length of generated data, algorithm and seed can be changed via RandomOptions."),
+    {},
+    "RandomOptions"};
+
+}  // namespace
+
+void RegisterScalarRandom(FunctionRegistry* registry) {
+  static auto random_options = RandomOptions::Defaults();
+
+  auto random_func = std::make_shared<ScalarFunction>("random", Arity::Nullary(),
+                                                      &random_doc, &random_options);
+  ScalarKernel kernel{
+      {}, ValueDescr(float64(), ValueDescr::Shape::ARRAY), ExecRandom, RandomState::Init};
+  kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(random_func->AddKernel(kernel));
+  DCHECK_OK(registry->AddFunction(std::move(random_func)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
new file mode 100644
index 0000000..51dbd08
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+namespace compute {
+
+static void RandomKernel(benchmark::State& state, bool is_seed) {
+  const int64_t length = state.range(0);
+  const auto options = is_seed ? RandomOptions::FromSeed(length, 42)
+                               : RandomOptions::FromSystemRandom(length);
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("random", {}, &options).status());
+  }
+  state.SetItemsProcessed(state.iterations() * length);
+}
+
+static void RandomKernelSystem(benchmark::State& state) {
+  RandomKernel(state, /*is_seed=*/false);
+}
+
+static void RandomKernelSeed(benchmark::State& state) {
+  RandomKernel(state, /*is_seed=*/true);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+  for (int64_t length : {1, 64, 1024, 65536}) {
+    bench->Arg(length);
+  }
+}
+
+BENCHMARK(RandomKernelSystem)->Apply(SetArgs);
+BENCHMARK(RandomKernelSeed)->Apply(SetArgs);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_test.cc b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
new file mode 100644
index 0000000..b4003fc
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::ThreadPool;
+
+namespace compute {
+
+namespace {
+
+void TestRandomWithOptions(const RandomOptions& random_options) {
+  ASSERT_OK_AND_ASSIGN(Datum result, CallFunction("random", {}, &random_options));
+  const auto result_array = result.make_array();
+  ValidateOutput(*result_array);
+  ASSERT_EQ(result_array->length(), random_options.length);
+  ASSERT_EQ(result_array->null_count(), 0);
+  AssertTypeEqual(result_array->type(), float64());
+
+  if (random_options.length > 0) {
+    // verify E(X), E(X^2) is near theory
+    double sum = 0, square_sum = 0;
+    const double* values = result_array->data()->GetValues<double>(1);
+    for (int64_t i = 0; i < random_options.length; ++i) {
+      const double value = values[i];
+      ASSERT_GE(value, 0);
+      ASSERT_LT(value, 1);
+      sum += value;
+      square_sum += value * value;
+    }
+    const double E_X = 0.5;
+    const double E_X2 = 1.0 / 12 + E_X * E_X;
+    ASSERT_NEAR(sum / random_options.length, E_X, E_X * 0.02);
+    ASSERT_NEAR(square_sum / random_options.length, E_X2, E_X2 * 0.02);
+  }
+}
+
+}  // namespace
+
+TEST(TestRandom, Seed) {
+  const int kCount = 100000;
+  auto random_options = RandomOptions::FromSeed(/*length=*/kCount, /*seed=*/0);
+  TestRandomWithOptions(random_options);
+}
+
+TEST(TestRandom, SystemRandom) {
+  const int kCount = 100000;
+  auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+  TestRandomWithOptions(random_options);
+}
+
+TEST(TestRandom, SeedIsDeterministic) {
+  const int kCount = 100;
+  auto random_options = RandomOptions::FromSeed(/*length=*/kCount, /*seed=*/0);
+  ASSERT_OK_AND_ASSIGN(Datum first_call, CallFunction("random", {}, &random_options));
+  ASSERT_OK_AND_ASSIGN(Datum second_call, CallFunction("random", {}, &random_options));
+  AssertDatumsEqual(first_call, second_call);
+}
+
+TEST(TestRandom, SystemRandomDifferentResultsSingleThreaded) {
+  const int kCount = 100;
+  auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+  ASSERT_OK_AND_ASSIGN(Datum first_datum, CallFunction("random", {}, &random_options));
+  ASSERT_OK_AND_ASSIGN(Datum second_datum, CallFunction("random", {}, &random_options));
+  ASSERT_FALSE(first_datum.Equals(second_datum));
+}
+
+TEST(TestRandom, SystemRandomDifferentResultsMultiThreaded) {
+  const int kCount = 100;
+  const int kThreadCount = 8;
+  const int kCallCount = 200;
+
+  ASSERT_OK_AND_ASSIGN(auto pool, ThreadPool::Make(kThreadCount));
+
+  auto random_options = RandomOptions::FromSystemRandom(/*length=*/kCount);
+  std::vector<Future<Datum>> futures;
+
+  for (int i = 0; i < kCallCount; ++i) {
+    futures.push_back(DeferNotOk(
+        pool->Submit([&]() { return CallFunction("random", {}, &random_options); })));
+  }
+  std::vector<Datum> call_results(kCallCount);
+  for (int i = 0; i < kCallCount; ++i) {
+    ASSERT_OK_AND_ASSIGN(call_results[i], futures[i].result());
+  }
+  for (int i = 0; i < kThreadCount - 1; ++i) {
+    for (int j = i + 1; j < kThreadCount; ++j) {
+      ASSERT_FALSE(call_results[i].Equals(call_results[j]));
+    }
+  }
+}
+
+TEST(TestRandom, Length) {
+  auto random_options = RandomOptions::FromSystemRandom(/*length=*/0);
+  TestRandomWithOptions(random_options);
+
+  random_options = RandomOptions::FromSystemRandom(/*length=*/-1);
+  ASSERT_RAISES(Invalid, CallFunction("random", {}, &random_options));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index c645587..600251c 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -162,6 +162,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarComparison(registry.get());
   RegisterScalarIfElse(registry.get());
   RegisterScalarNested(registry.get());
+  RegisterScalarRandom(registry.get());  // Nullary
   RegisterScalarSetLookup(registry.get());
   RegisterScalarStringAscii(registry.get());
   RegisterScalarTemporalBinary(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index 98f6118..3a70ff9 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -31,6 +31,7 @@ void RegisterScalarCast(FunctionRegistry* registry);
 void RegisterScalarComparison(FunctionRegistry* registry);
 void RegisterScalarIfElse(FunctionRegistry* registry);
 void RegisterScalarNested(FunctionRegistry* registry);
+void RegisterScalarRandom(FunctionRegistry* registry);  // Nullary
 void RegisterScalarSetLookup(FunctionRegistry* registry);
 void RegisterScalarStringAscii(FunctionRegistry* registry);
 void RegisterScalarTemporalBinary(FunctionRegistry* registry);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 8699f91..e88bd4a 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1388,7 +1388,7 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi
 +--------------------+------------+-------------------+---------------+----------------------------+-------+
 | second             | Unary      | Timestamp, Time   | Int64         |                            |       |
 +--------------------+------------+-------------------+---------------+----------------------------+-------+
-| subsecond          | Unary      | Timestamp, Time   | Double        |                            |       |
+| subsecond          | Unary      | Timestamp, Time   | Float64       |                            |       |
 +--------------------+------------+-------------------+---------------+----------------------------+-------+
 | us_week            | Unary      | Temporal          | Int64         |                            | \(4)  |
 +--------------------+------------+-------------------+---------------+----------------------------+-------+
@@ -1489,6 +1489,21 @@ An error is returned if the timestamps already have the timezone metadata set.
   allows choosing the behaviour when a timestamp is ambiguous or nonexistent
   in the given timezone (because of DST shifts).
 
+Random number generation
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+This function generates an array of uniformly-distributed double-precision numbers
+in range [0, 1). The options provide the length of the output and the algorithm for
+generating the random numbers, using either a seed or a system-provided, platform-specific
+random generator.
+
++--------------------+------------+---------------+-------------------------+
+| Function name      | Arity      | Output type   | Options class           |
++====================+============+===============+=========================+
+| random             | Nullary    | Float64       | :struct:`RandomOptions` |
++--------------------+------------+---------------+-------------------------+
+
+
 Array-wise ("vector") functions
 -------------------------------
 
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index cacc40d..9e26208 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -91,6 +91,8 @@ def test_exported_functions():
             # message if we don't pass an options instance.
             continue
         arity = desc['arity']
+        if arity == 0:
+            continue
         if arity is Ellipsis:
             args = [object()] * 3
         else:
@@ -274,8 +276,6 @@ def test_function_attributes():
         kernels = func.kernels
         assert func.num_kernels == len(kernels)
         assert all(isinstance(ker, pc.Kernel) for ker in kernels)
-        if func.arity is not Ellipsis:
-            assert func.arity >= 1
         repr(func)
         for ker in kernels:
             repr(ker)