You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bk...@apache.org on 2023/06/22 16:03:23 UTC
[arrow] branch main updated: GH-32190: [C++][Compute] Implement cumulative prod, max and min functions (#36020)
This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e3eb5898e7 GH-32190: [C++][Compute] Implement cumulative prod, max and min functions (#36020)
e3eb5898e7 is described below
commit e3eb5898e75a0b901724f771a7e2de069993a33c
Author: Jin Shang <sh...@gmail.com>
AuthorDate: Fri Jun 23 00:03:16 2023 +0800
GH-32190: [C++][Compute] Implement cumulative prod, max and min functions (#36020)
### Rationale for this change
Implement cumulative prod, max and min compute functions
### What changes are included in this PR?
1. Add implementations, docs and tests for the three functions.
2. Refactor `CumulativeSumOptions` to `CumulativeOptions` for reusability.
3. Fix a bug where `GenericFromScalar(GenericToScalar(std::nullopt)) != std::nullopt`.
4. Remove an unnecessary Cast with the default start value.
5. Add tests to check behavior with `NaN`.
I'll explain some of the changes in comments.
### Are these changes tested?
Yes, in vector_accumulative_ops_test.cc and test_compute.py
### Are there any user-facing changes?
No. The data members of `CumulativeSumOptions` are changed, but the member functions behave as before. And std::optional<T> also can be constructed directly from T. So users should not feel any difference.
* Closes: #32190
Lead-authored-by: Jin Shang <sh...@gmail.com>
Co-authored-by: Benjamin Kietzman <be...@gmail.com>
Signed-off-by: Benjamin Kietzman <be...@gmail.com>
---
cpp/src/arrow/compute/api_vector.cc | 39 +-
cpp/src/arrow/compute/api_vector.h | 61 +-
cpp/src/arrow/compute/function_internal.h | 54 +-
.../compute/kernels/base_arithmetic_internal.h | 81 +++
cpp/src/arrow/compute/kernels/codegen_internal.h | 3 +
.../arrow/compute/kernels/vector_cumulative_ops.cc | 97 ++-
.../compute/kernels/vector_cumulative_ops_test.cc | 659 ++++++++++++++++++---
cpp/src/arrow/scalar.h | 3 +
docs/source/cpp/compute.rst | 49 +-
docs/source/python/api/compute.rst | 12 +-
python/pyarrow/_compute.pyx | 31 +-
python/pyarrow/compute.py | 3 +-
python/pyarrow/includes/libarrow.pxd | 9 +-
python/pyarrow/tests/test_compute.py | 180 +++++-
14 files changed, 1096 insertions(+), 185 deletions(-)
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 5044d4f256..b33e3feb72 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -26,6 +26,7 @@
#include "arrow/array/array_nested.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
#include "arrow/compute/function_internal.h"
#include "arrow/compute/kernels/vector_sort_internal.h"
#include "arrow/compute/registry.h"
@@ -142,9 +143,9 @@ static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOption
static auto kSelectKOptionsType = GetFunctionOptionsType<SelectKOptions>(
DataMember("k", &SelectKOptions::k),
DataMember("sort_keys", &SelectKOptions::sort_keys));
-static auto kCumulativeSumOptionsType = GetFunctionOptionsType<CumulativeSumOptions>(
- DataMember("start", &CumulativeSumOptions::start),
- DataMember("skip_nulls", &CumulativeSumOptions::skip_nulls));
+static auto kCumulativeOptionsType = GetFunctionOptionsType<CumulativeOptions>(
+ DataMember("start", &CumulativeOptions::start),
+ DataMember("skip_nulls", &CumulativeOptions::skip_nulls));
static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("sort_keys", &RankOptions::sort_keys),
DataMember("null_placement", &RankOptions::null_placement),
@@ -198,13 +199,15 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector<SortKey> sort_keys)
sort_keys(std::move(sort_keys)) {}
constexpr char SelectKOptions::kTypeName[];
-CumulativeSumOptions::CumulativeSumOptions(double start, bool skip_nulls)
- : CumulativeSumOptions(std::make_shared<DoubleScalar>(start), skip_nulls) {}
-CumulativeSumOptions::CumulativeSumOptions(std::shared_ptr<Scalar> start, bool skip_nulls)
- : FunctionOptions(internal::kCumulativeSumOptionsType),
+CumulativeOptions::CumulativeOptions(bool skip_nulls)
+ : FunctionOptions(internal::kCumulativeOptionsType), skip_nulls(skip_nulls) {}
+CumulativeOptions::CumulativeOptions(double start, bool skip_nulls)
+ : CumulativeOptions(std::make_shared<DoubleScalar>(start), skip_nulls) {}
+CumulativeOptions::CumulativeOptions(std::shared_ptr<Scalar> start, bool skip_nulls)
+ : FunctionOptions(internal::kCumulativeOptionsType),
start(std::move(start)),
skip_nulls(skip_nulls) {}
-constexpr char CumulativeSumOptions::kTypeName[];
+constexpr char CumulativeOptions::kTypeName[];
RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_placement,
RankOptions::Tiebreaker tiebreaker)
@@ -224,7 +227,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
- DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeSumOptionsType));
+ DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType));
}
} // namespace internal
@@ -375,12 +378,28 @@ Result<std::shared_ptr<Array>> DropNull(const Array& values, ExecContext* ctx) {
// ----------------------------------------------------------------------
// Cumulative functions
-Result<Datum> CumulativeSum(const Datum& values, const CumulativeSumOptions& options,
+Result<Datum> CumulativeSum(const Datum& values, const CumulativeOptions& options,
bool check_overflow, ExecContext* ctx) {
auto func_name = check_overflow ? "cumulative_sum_checked" : "cumulative_sum";
return CallFunction(func_name, {Datum(values)}, &options, ctx);
}
+Result<Datum> CumulativeProd(const Datum& values, const CumulativeOptions& options,
+ bool check_overflow, ExecContext* ctx) {
+ auto func_name = check_overflow ? "cumulative_prod_checked" : "cumulative_prod";
+ return CallFunction(func_name, {Datum(values)}, &options, ctx);
+}
+
+Result<Datum> CumulativeMax(const Datum& values, const CumulativeOptions& options,
+ ExecContext* ctx) {
+ return CallFunction("cumulative_max", {Datum(values)}, &options, ctx);
+}
+
+Result<Datum> CumulativeMin(const Datum& values, const CumulativeOptions& options,
+ ExecContext* ctx) {
+ return CallFunction("cumulative_min", {Datum(values)}, &options, ctx);
+}
+
// ----------------------------------------------------------------------
// Deprecated functions
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 400d69efc7..56bccb38c2 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -210,21 +210,29 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
NullPlacement null_placement;
};
-/// \brief Options for cumulative sum function
-class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions {
+/// \brief Options for cumulative functions
+/// \note Also aliased as CumulativeSumOptions for backward compatibility
+class ARROW_EXPORT CumulativeOptions : public FunctionOptions {
public:
- explicit CumulativeSumOptions(double start = 0, bool skip_nulls = false);
- explicit CumulativeSumOptions(std::shared_ptr<Scalar> start, bool skip_nulls = false);
- static constexpr char const kTypeName[] = "CumulativeSumOptions";
- static CumulativeSumOptions Defaults() { return CumulativeSumOptions(); }
-
- /// Optional starting value for cumulative operation computation
- std::shared_ptr<Scalar> start;
+ explicit CumulativeOptions(bool skip_nulls = false);
+ explicit CumulativeOptions(double start, bool skip_nulls = false);
+ explicit CumulativeOptions(std::shared_ptr<Scalar> start, bool skip_nulls = false);
+ static constexpr char const kTypeName[] = "CumulativeOptions";
+ static CumulativeOptions Defaults() { return CumulativeOptions(); }
+
+ /// Optional starting value for cumulative operation computation, default depends on the
+ /// operation and input type.
+ /// - sum: 0
+ /// - prod: 1
+ /// - min: maximum of the input type
+ /// - max: minimum of the input type
+ std::optional<std::shared_ptr<Scalar>> start;
/// If true, nulls in the input are ignored and produce a corresponding null output.
/// When false, the first null encountered is propagated through the remaining output.
bool skip_nulls = false;
};
+using CumulativeSumOptions = CumulativeOptions; // For backward compatibility
/// @}
@@ -607,10 +615,41 @@ Result<Datum> RunEndDecode(const Datum& value, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
ARROW_EXPORT
Result<Datum> CumulativeSum(
- const Datum& values,
- const CumulativeSumOptions& options = CumulativeSumOptions::Defaults(),
+ const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
bool check_overflow = false, ExecContext* ctx = NULLPTR);
+/// \brief Compute the cumulative product of an array-like object
+///
+/// \param[in] values array-like input
+/// \param[in] options configures cumulative prod behavior
+/// \param[in] check_overflow whether to check for overflow, if true, return Invalid
+/// status on overflow, otherwise wrap around on overflow
+/// \param[in] ctx the function execution context, optional
+ARROW_EXPORT
+Result<Datum> CumulativeProd(
+ const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
+ bool check_overflow = false, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the cumulative max of an array-like object
+///
+/// \param[in] values array-like input
+/// \param[in] options configures cumulative max behavior
+/// \param[in] ctx the function execution context, optional
+ARROW_EXPORT
+Result<Datum> CumulativeMax(
+ const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the cumulative min of an array-like object
+///
+/// \param[in] values array-like input
+/// \param[in] options configures cumulative min behavior
+/// \param[in] ctx the function execution context, optional
+ARROW_EXPORT
+Result<Datum> CumulativeMin(
+ const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
+ ExecContext* ctx = NULLPTR);
+
// ----------------------------------------------------------------------
// Deprecated functions
diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h
index c0dbaac100..653273ef0f 100644
--- a/cpp/src/arrow/compute/function_internal.h
+++ b/cpp/src/arrow/compute/function_internal.h
@@ -28,6 +28,7 @@
#include "arrow/compute/function.h"
#include "arrow/compute/type_fwd.h"
#include "arrow/result.h"
+#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/key_value_metadata.h"
@@ -283,12 +284,6 @@ static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
return MakeScalar(value);
}
-template <typename T>
-static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
- const std::optional<T>& value) {
- return value.has_value() ? MakeScalar(value.value()) : MakeScalar("");
-}
-
// For Clang/libc++: when iterating through vector<bool>, we can't
// pass it by reference so the overload above doesn't apply
static inline Result<std::shared_ptr<Scalar>> GenericToScalar(bool value) {
@@ -382,6 +377,16 @@ static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& value
}
}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(std::nullopt_t) {
+ return std::make_shared<NullScalar>();
+}
+
+template <typename T>
+static inline auto GenericToScalar(const std::optional<T>& value)
+ -> Result<decltype(MakeScalar(value.value()))> {
+ return value.has_value() ? MakeScalar(value.value()) : std::make_shared<NullScalar>();
+}
+
template <typename T>
static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, Result<T>>
GenericFromScalar(const std::shared_ptr<Scalar>& value) {
@@ -404,26 +409,6 @@ GenericFromScalar(const std::shared_ptr<Scalar>& value) {
return ValidateEnumValue<T>(raw_val);
}
-template <typename>
-constexpr bool is_optional_impl = false;
-template <typename T>
-constexpr bool is_optional_impl<std::optional<T>> = true;
-
-template <typename T>
-using is_optional =
- std::integral_constant<bool, is_optional_impl<std::decay_t<T>> ||
- std::is_same<T, std::nullopt_t>::value>;
-
-template <typename T, typename R = void>
-using enable_if_optional = enable_if_t<is_optional<T>::value, Result<T>>;
-
-template <typename T>
-static inline enable_if_optional<T> GenericFromScalar(
- const std::shared_ptr<Scalar>& value) {
- using value_type = typename T::value_type;
- return GenericFromScalar<value_type>(value);
-}
-
template <typename T, typename U>
using enable_if_same_result = enable_if_same<T, U, Result<T>>;
@@ -510,6 +495,23 @@ static inline enable_if_same_result<T, Datum> GenericFromScalar(
return Status::Invalid("Cannot deserialize Datum from ", value->ToString());
}
+template <typename>
+constexpr inline bool is_optional_v = false;
+template <typename T>
+constexpr inline bool is_optional_v<std::optional<T>> = true;
+template <>
+constexpr inline bool is_optional_v<std::nullopt_t> = true;
+
+template <typename T>
+static inline std::enable_if_t<is_optional_v<T>, Result<T>> GenericFromScalar(
+ const std::shared_ptr<Scalar>& value) {
+ using value_type = typename T::value_type;
+ if (value->type->id() == Type::NA) {
+ return std::nullopt;
+ }
+ return GenericFromScalar<value_type>(value);
+}
+
template <typename T>
static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>>
GenericFromScalar(const std::shared_ptr<Scalar>& value) {
diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
index 65329e10db..7798c61577 100644
--- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
+++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
@@ -17,6 +17,7 @@
#pragma once
+#include <limits>
#include "arrow/compute/api_scalar.h"
#include "arrow/compute/kernels/common_internal.h"
#include "arrow/compute/kernels/util_internal.h"
@@ -605,6 +606,86 @@ struct Sign {
}
};
+struct Max {
+ template <typename T, typename Arg0, typename Arg1>
+ static constexpr enable_if_not_floating_value<T> Call(KernelContext*, Arg0 arg0,
+ Arg1 arg1, Status*) {
+ static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value);
+ return std::max(arg0, arg1);
+ }
+
+ template <typename T, typename Arg0, typename Arg1>
+ static constexpr enable_if_floating_value<T> Call(KernelContext*, Arg0 left, Arg1 right,
+ Status*) {
+ static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value);
+ if (std::isnan(left)) {
+ return right;
+ } else if (std::isnan(right)) {
+ return left;
+ } else {
+ return std::max(left, right);
+ }
+ }
+};
+
+struct Min {
+ template <typename T, typename Arg0, typename Arg1>
+ static constexpr enable_if_not_floating_value<T> Call(KernelContext*, Arg0 arg0,
+ Arg1 arg1, Status*) {
+ static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value);
+ return std::min(arg0, arg1);
+ }
+
+ template <typename T, typename Arg0, typename Arg1>
+ static constexpr enable_if_floating_value<T> Call(KernelContext*, Arg0 left, Arg1 right,
+ Status*) {
+ static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value);
+ if (std::isnan(left)) {
+ return right;
+ } else if (std::isnan(right)) {
+ return left;
+ } else {
+ return std::min(left, right);
+ }
+ }
+};
+
+/// The term identity is from the mathematical notation monoid.
+/// For any associative binary operation, identity is defined as:
+/// Op(identity, x) = x for all x.
+template <typename Op>
+struct Identity;
+
+template <>
+struct Identity<Add> {
+ template <typename Value>
+ static constexpr Value value{0};
+};
+
+template <>
+struct Identity<AddChecked> : Identity<Add> {};
+
+template <>
+struct Identity<Multiply> {
+ template <typename Value>
+ static constexpr Value value{1};
+};
+
+template <>
+struct Identity<MultiplyChecked> : Identity<Multiply> {};
+
+template <>
+struct Identity<Max> {
+ template <typename Value>
+ static constexpr Value value{std::numeric_limits<Value>::min()};
+};
+
+template <>
+struct Identity<Min> {
+ template <typename Value>
+ static constexpr Value value{std::numeric_limits<Value>::max()};
+};
+
} // namespace internal
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 3dd1f2b811..6224a9fc2a 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -213,6 +213,9 @@ using enable_if_integer_value =
template <typename T, typename R = T>
using enable_if_floating_value = enable_if_t<std::is_floating_point<T>::value, R>;
+template <typename T, typename R = T>
+using enable_if_not_floating_value = enable_if_t<!std::is_floating_point<T>::value, R>;
+
template <typename T, typename R = T>
using enable_if_decimal_value =
enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, T>::value,
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
index 91d78f690b..82caa3bff5 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <type_traits>
#include "arrow/array/array_base.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/compute/api_scalar.h"
@@ -49,15 +50,12 @@ struct CumulativeOptionsWrapper : public OptionsWrapper<OptionsType> {
}
const auto& start = options->start;
- if (!start || !start->is_valid) {
- return Status::Invalid("Cumulative `start` option must be non-null and valid");
- }
- // Ensure `start` option matches input type
- if (!start->type->Equals(*args.inputs[0])) {
- ARROW_ASSIGN_OR_RAISE(
- auto casted_start,
- Cast(Datum(start), args.inputs[0], CastOptions::Safe(), ctx->exec_context()));
+ // Ensure `start` option, if given, matches input type
+ if (start.has_value() && !start.value()->type->Equals(*args.inputs[0])) {
+ ARROW_ASSIGN_OR_RAISE(auto casted_start,
+ Cast(Datum(start.value()), args.inputs[0],
+ CastOptions::Safe(), ctx->exec_context()));
auto new_options = OptionsType(casted_start.scalar(), options->skip_nulls);
return std::make_unique<State>(new_options);
}
@@ -66,10 +64,11 @@ struct CumulativeOptionsWrapper : public OptionsWrapper<OptionsType> {
};
// The driver kernel for all cumulative compute functions. Op is a compute kernel
-// representing any binary associative operation (add, product, min, max, etc.) and
-// OptionsType the options type corresponding to Op. ArgType and OutType are the input
-// and output types, which will normally be the same (e.g. the cumulative sum of an array
-// of Int64Type will result in an array of Int64Type).
+// representing any binary associative operation with an identity element (add, product,
+// min, max, etc.), i.e. ones that form a monoid, and OptionsType the options type
+// corresponding to Op. ArgType and OutType are the input and output types, which will
+// normally be the same (e.g. the cumulative sum of an array of Int64Type will result in
+// an array of Int64Type).
template <typename OutType, typename ArgType, typename Op, typename OptionsType>
struct Accumulator {
using OutValue = typename GetOutputType<OutType>::T;
@@ -118,10 +117,15 @@ struct Accumulator {
template <typename OutType, typename ArgType, typename Op, typename OptionsType>
struct CumulativeKernel {
+ using OutValue = typename GetOutputType<OutType>::T;
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = CumulativeOptionsWrapper<OptionsType>::Get(ctx);
Accumulator<OutType, ArgType, Op, OptionsType> accumulator(ctx);
- accumulator.current_value = UnboxScalar<OutType>::Unbox(*(options.start));
+ if (options.start.has_value()) {
+ accumulator.current_value = UnboxScalar<OutType>::Unbox(*(options.start.value()));
+ } else {
+ accumulator.current_value = Identity<Op>::template value<OutValue>;
+ }
accumulator.skip_nulls = options.skip_nulls;
RETURN_NOT_OK(accumulator.builder.Reserve(batch.length));
@@ -136,10 +140,15 @@ struct CumulativeKernel {
template <typename OutType, typename ArgType, typename Op, typename OptionsType>
struct CumulativeKernelChunked {
+ using OutValue = typename GetOutputType<OutType>::T;
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const auto& options = CumulativeOptionsWrapper<OptionsType>::Get(ctx);
Accumulator<OutType, ArgType, Op, OptionsType> accumulator(ctx);
- accumulator.current_value = UnboxScalar<OutType>::Unbox(*(options.start));
+ if (options.start.has_value()) {
+ accumulator.current_value = UnboxScalar<OutType>::Unbox(*(options.start.value()));
+ } else {
+ accumulator.current_value = Identity<Op>::template value<OutValue>;
+ }
accumulator.skip_nulls = options.skip_nulls;
const ChunkedArray& chunked_input = *batch[0].chunked_array();
@@ -160,18 +169,54 @@ const FunctionDoc cumulative_sum_doc{
("`values` must be numeric. Return an array/chunked array which is the\n"
"cumulative sum computed over `values`. Results will wrap around on\n"
"integer overflow. Use function \"cumulative_sum_checked\" if you want\n"
- "overflow to return an error."),
+ "overflow to return an error. The default start is 0."),
{"values"},
- "CumulativeSumOptions"};
+ "CumulativeOptions"};
const FunctionDoc cumulative_sum_checked_doc{
"Compute the cumulative sum over a numeric input",
("`values` must be numeric. Return an array/chunked array which is the\n"
"cumulative sum computed over `values`. This function returns an error\n"
"on overflow. For a variant that doesn't fail on overflow, use\n"
- "function \"cumulative_sum\"."),
+ "function \"cumulative_sum\". The default start is 0."),
+ {"values"},
+ "CumulativeOptions"};
+
+const FunctionDoc cumulative_prod_doc{
+ "Compute the cumulative product over a numeric input",
+ ("`values` must be numeric. Return an array/chunked array which is the\n"
+ "cumulative product computed over `values`. Results will wrap around on\n"
+ "integer overflow. Use function \"cumulative_prod_checked\" if you want\n"
+ "overflow to return an error. The default start is 1."),
+ {"values"},
+ "CumulativeOptions"};
+
+const FunctionDoc cumulative_prod_checked_doc{
+ "Compute the cumulative product over a numeric input",
+ ("`values` must be numeric. Return an array/chunked array which is the\n"
+ "cumulative product computed over `values`. This function returns an error\n"
+ "on overflow. For a variant that doesn't fail on overflow, use\n"
+ "function \"cumulative_prod\". The default start is 1."),
+ {"values"},
+ "CumulativeOptions"};
+
+const FunctionDoc cumulative_max_doc{
+ "Compute the cumulative max over a numeric input",
+ ("`values` must be numeric. Return an array/chunked array which is the\n"
+ "cumulative max computed over `values`. The default start is the minimum\n"
+ "value of input type (so that any other value will replace the\n"
+ "start as the new maximum)."),
{"values"},
- "CumulativeSumOptions"};
+ "CumulativeOptions"};
+
+const FunctionDoc cumulative_min_doc{
+ "Compute the cumulative min over a numeric input",
+ ("`values` must be numeric. Return an array/chunked array which is the\n"
+ "cumulative min computed over `values`. The default start is the maximum\n"
+ "value of input type (so that any other value will replace the\n"
+ "start as the new minimum)."),
+ {"values"},
+ "CumulativeOptions"};
} // namespace
template <typename Op, typename OptionsType>
@@ -203,10 +248,20 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string
}
void RegisterVectorCumulativeSum(FunctionRegistry* registry) {
- MakeVectorCumulativeFunction<Add, CumulativeSumOptions>(registry, "cumulative_sum",
- cumulative_sum_doc);
- MakeVectorCumulativeFunction<AddChecked, CumulativeSumOptions>(
+ MakeVectorCumulativeFunction<Add, CumulativeOptions>(registry, "cumulative_sum",
+ cumulative_sum_doc);
+ MakeVectorCumulativeFunction<AddChecked, CumulativeOptions>(
registry, "cumulative_sum_checked", cumulative_sum_checked_doc);
+
+ MakeVectorCumulativeFunction<Multiply, CumulativeOptions>(registry, "cumulative_prod",
+ cumulative_prod_doc);
+ MakeVectorCumulativeFunction<MultiplyChecked, CumulativeOptions>(
+ registry, "cumulative_prod_checked", cumulative_prod_checked_doc);
+
+ MakeVectorCumulativeFunction<Min, CumulativeOptions>(registry, "cumulative_min",
+ cumulative_min_doc);
+ MakeVectorCumulativeFunction<Max, CumulativeOptions>(registry, "cumulative_max",
+ cumulative_max_doc);
}
} // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
index 3c6bb3c1d1..4ff46eb4ac 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
@@ -24,6 +24,7 @@
#include "arrow/array.h"
#include "arrow/chunked_array.h"
#include "arrow/compute/api_vector.h"
+#include "arrow/scalar.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
@@ -36,43 +37,44 @@
namespace arrow {
namespace compute {
-TEST(TestCumulativeSum, Empty) {
- CumulativeSumOptions options;
- for (auto ty : NumericTypes()) {
- auto empty_arr = ArrayFromJSON(ty, "[]");
- auto empty_chunked = ChunkedArrayFromJSON(ty, {"[]"});
- CheckVectorUnary("cumulative_sum", empty_arr, empty_arr, &options);
- CheckVectorUnary("cumulative_sum_checked", empty_arr, empty_arr, &options);
+constexpr static std::array<const char*, 6> kCumulativeFunctionNames{
+ "cumulative_sum", "cumulative_sum_checked", "cumulative_prod",
+ "cumulative_prod_checked", "cumulative_min", "cumulative_max"};
+
+TEST(TestCumulative, Empty) {
+ for (auto function : kCumulativeFunctionNames) {
+ CumulativeOptions options;
+ for (auto ty : NumericTypes()) {
+ auto empty_arr = ArrayFromJSON(ty, "[]");
+ auto empty_chunked = ChunkedArrayFromJSON(ty, {"[]"});
+ CheckVectorUnary(function, empty_arr, empty_arr, &options);
- CheckVectorUnary("cumulative_sum", empty_chunked, empty_chunked, &options);
- CheckVectorUnary("cumulative_sum_checked", empty_chunked, empty_chunked, &options);
+ CheckVectorUnary(function, empty_chunked, empty_chunked, &options);
+ }
}
}
-TEST(TestCumulativeSum, AllNulls) {
- CumulativeSumOptions options;
- for (auto ty : NumericTypes()) {
- auto nulls_arr = ArrayFromJSON(ty, "[null, null, null]");
- auto nulls_one_chunk = ChunkedArrayFromJSON(ty, {"[null, null, null]"});
- auto nulls_three_chunks = ChunkedArrayFromJSON(ty, {"[null]", "[null]", "[null]"});
- CheckVectorUnary("cumulative_sum", nulls_arr, nulls_arr, &options);
- CheckVectorUnary("cumulative_sum_checked", nulls_arr, nulls_arr, &options);
-
- CheckVectorUnary("cumulative_sum", nulls_one_chunk, nulls_one_chunk, &options);
- CheckVectorUnary("cumulative_sum_checked", nulls_one_chunk, nulls_one_chunk,
- &options);
+TEST(TestCumulative, AllNulls) {
+ for (auto function : kCumulativeFunctionNames) {
+ CumulativeOptions options;
+ for (auto ty : NumericTypes()) {
+ auto nulls_arr = ArrayFromJSON(ty, "[null, null, null]");
+ auto nulls_one_chunk = ChunkedArrayFromJSON(ty, {"[null, null, null]"});
+ auto nulls_three_chunks = ChunkedArrayFromJSON(ty, {"[null]", "[null]", "[null]"});
+ CheckVectorUnary(function, nulls_arr, nulls_arr, &options);
- CheckVectorUnary("cumulative_sum", nulls_three_chunks, nulls_one_chunk, &options);
- CheckVectorUnary("cumulative_sum_checked", nulls_three_chunks, nulls_one_chunk,
- &options);
+ CheckVectorUnary(function, nulls_one_chunk, nulls_one_chunk, &options);
+
+ CheckVectorUnary(function, nulls_three_chunks, nulls_one_chunk, &options);
+ }
}
}
TEST(TestCumulativeSum, ScalarInput) {
- CumulativeSumOptions no_start_no_skip;
- CumulativeSumOptions no_start_do_skip(0, true);
- CumulativeSumOptions has_start_no_skip(10);
- CumulativeSumOptions has_start_do_skip(10, true);
+ CumulativeOptions no_start_no_skip;
+ CumulativeOptions no_start_do_skip(0, true);
+ CumulativeOptions has_start_no_skip(10.0);
+ CumulativeOptions has_start_do_skip(10, true);
for (auto ty : NumericTypes()) {
CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"),
@@ -105,6 +107,43 @@ TEST(TestCumulativeSum, ScalarInput) {
}
}
+TEST(TestCumulativeProd, ScalarInput) {
+ CumulativeOptions no_start_no_skip;
+ CumulativeOptions no_start_do_skip(1, true);
+ CumulativeOptions has_start_no_skip(10.0);
+ CumulativeOptions has_start_do_skip(10, true);
+
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "10"),
+ ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "10"),
+ ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "10"),
+ ArrayFromJSON(ty, "[100]"), &has_start_no_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "10"),
+ ArrayFromJSON(ty, "[100]"), &has_start_no_skip);
+
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+ CheckVectorUnary("cumulative_prod", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+ CheckVectorUnary("cumulative_prod_checked", ScalarFromJSON(ty, "null"),
+ ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+ }
+}
+
using testing::HasSubstr;
template <typename ArrowType>
@@ -112,7 +151,7 @@ void CheckCumulativeSumUnsignedOverflow() {
using CType = typename TypeTraits<ArrowType>::CType;
using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
- CumulativeSumOptions pos_overflow(1);
+ CumulativeOptions pos_overflow(1.0);
auto max = std::numeric_limits<CType>::max();
auto min = std::numeric_limits<CType>::lowest();
@@ -138,7 +177,7 @@ void CheckCumulativeSumSignedOverflow() {
CheckCumulativeSumUnsignedOverflow<ArrowType>();
- CumulativeSumOptions neg_overflow(-1);
+ CumulativeOptions neg_overflow(-1.0);
auto max = std::numeric_limits<CType>::max();
auto min = std::numeric_limits<CType>::lowest();
@@ -167,8 +206,64 @@ TEST(TestCumulativeSum, IntegerOverflow) {
CheckCumulativeSumSignedOverflow<Int64Type>();
}
+template <typename ArrowType>
+void CheckCumulativeProdUnsignedOverflow() {
+ using CType = typename TypeTraits<ArrowType>::CType;
+ using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
+ CumulativeOptions pos_overflow(2.0);
+ auto max = std::numeric_limits<CType>::max();
+ auto min = std::numeric_limits<CType>::lowest();
+
+ BuilderType builder;
+ std::shared_ptr<Array> half_max_arr;
+ std::shared_ptr<Array> min_arr;
+ ASSERT_OK(builder.Append(max / 2 + 1)); // 2 * (max / 2 + 1) overflows to min
+ ASSERT_OK(builder.Finish(&half_max_arr));
+ builder.Reset();
+ ASSERT_OK(builder.Append(min));
+ ASSERT_OK(builder.Finish(&min_arr));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid, HasSubstr("overflow"),
+ CallFunction("cumulative_prod_checked", {half_max_arr}, &pos_overflow));
+ CheckVectorUnary("cumulative_prod", half_max_arr, min_arr, &pos_overflow);
+}
+
+template <typename ArrowType>
+void CheckCumulativeProdSignedOverflow() {
+ using CType = typename TypeTraits<ArrowType>::CType;
+ using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
+ CheckCumulativeSumUnsignedOverflow<ArrowType>();
+
+ CumulativeOptions neg_overflow(-1.0); // min * -1 overflows to min
+ auto min = std::numeric_limits<CType>::lowest();
+
+ BuilderType builder;
+ std::shared_ptr<Array> min_arr;
+ builder.Reset();
+ ASSERT_OK(builder.Append(min));
+ ASSERT_OK(builder.Finish(&min_arr));
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid, HasSubstr("overflow"),
+ CallFunction("cumulative_prod_checked", {min_arr}, &neg_overflow));
+ CheckVectorUnary("cumulative_prod", min_arr, min_arr, &neg_overflow);
+}
+
+TEST(TestCumulativeProd, IntegerOverflow) {
+ CheckCumulativeProdUnsignedOverflow<UInt8Type>();
+ CheckCumulativeProdUnsignedOverflow<UInt16Type>();
+ CheckCumulativeProdUnsignedOverflow<UInt32Type>();
+ CheckCumulativeProdUnsignedOverflow<UInt64Type>();
+ CheckCumulativeProdSignedOverflow<Int8Type>();
+ CheckCumulativeProdSignedOverflow<Int16Type>();
+ CheckCumulativeProdSignedOverflow<Int32Type>();
+ CheckCumulativeProdSignedOverflow<Int64Type>();
+}
+
TEST(TestCumulativeSum, NoStartNoSkip) {
- CumulativeSumOptions options;
+ CumulativeOptions options;
for (auto ty : NumericTypes()) {
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
ArrayFromJSON(ty, "[1, 3, 6, 10, 15, 21]"), &options);
@@ -212,52 +307,53 @@ TEST(TestCumulativeSum, NoStartNoSkip) {
}
}
-TEST(TestCumulativeSum, NoStartDoSkip) {
- CumulativeSumOptions options(0, true);
+TEST(TestCumulativeSum, HasStartNoSkip) {
+ CumulativeOptions options(10.0);
for (auto ty : NumericTypes()) {
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
- ArrayFromJSON(ty, "[1, 3, 6, 10, 15, 21]"), &options);
+ ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
CheckVectorUnary("cumulative_sum_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
- ArrayFromJSON(ty, "[1, 3, 6, 10, 15, 21]"), &options);
+ ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[1, 3, null, 7, null, 13]"), &options);
+ ArrayFromJSON(ty, "[11, 13, null, null, null, null]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[1, 3, null, 7, null, 13]"), &options);
+ ArrayFromJSON(ty, "[11, 13, null, null, null, null]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, 2, null, 6, null, 12]"), &options);
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, 2, null, 6, null, 12]"), &options);
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
CheckVectorUnary("cumulative_sum",
ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}),
- ChunkedArrayFromJSON(ty, {"[1, 3, 6, 10, 15, 21]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[11, 13, 16, 20, 25, 31]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}),
- ChunkedArrayFromJSON(ty, {"[1, 3, 6, 10, 15, 21]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[11, 13, 16, 20, 25, 31]"}), &options);
- CheckVectorUnary("cumulative_sum",
- ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[1, 3, null, 7, null, 13]"}), &options);
+ CheckVectorUnary(
+ "cumulative_sum", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[11, 13, null, null, null, null]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[1, 3, null, 7, null, 13]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[11, 13, null, null, null, null]"}),
+ &options);
CheckVectorUnary(
"cumulative_sum", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, 2, null, 6, null, 12]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, 2, null, 6, null, 12]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}),
&options);
}
}
-TEST(TestCumulativeSum, HasStartNoSkip) {
- CumulativeSumOptions options(10);
+TEST(TestCumulativeSum, HasStartDoSkip) {
+ CumulativeOptions options(10, true);
for (auto ty : NumericTypes()) {
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
@@ -265,16 +361,16 @@ TEST(TestCumulativeSum, HasStartNoSkip) {
ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[11, 13, null, null, null, null]"), &options);
+ ArrayFromJSON(ty, "[11, 13, null, 17, null, 23]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[11, 13, null, null, null, null]"), &options);
+ ArrayFromJSON(ty, "[11, 13, null, 17, null, 23]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+ ArrayFromJSON(ty, "[null, 12, null, 16, null, 22]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+ ArrayFromJSON(ty, "[null, 12, null, 16, null, 22]"), &options);
CheckVectorUnary("cumulative_sum",
ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}),
@@ -285,76 +381,487 @@ TEST(TestCumulativeSum, HasStartNoSkip) {
CheckVectorUnary(
"cumulative_sum", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, null, null, null, null]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[11, 13, null, 17, null, 23]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, null, null, null, null]"}),
+ ChunkedArrayFromJSON(ty, {"[11, 13, null, 17, null, 23]"}),
&options);
CheckVectorUnary(
"cumulative_sum", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[null, 12, null, 16, null, 22]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 12, null, 16, null, 22]"}),
&options);
}
}
-TEST(TestCumulativeSum, HasStartDoSkip) {
- CumulativeSumOptions options(10, true);
+TEST(TestCumulativeSum, NoStartDoSkip) {
+ CumulativeOptions options(0, true);
for (auto ty : NumericTypes()) {
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
- ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
+ ArrayFromJSON(ty, "[1, 3, 6, 10, 15, 21]"), &options);
CheckVectorUnary("cumulative_sum_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5, 6]"),
- ArrayFromJSON(ty, "[11, 13, 16, 20, 25, 31]"), &options);
+ ArrayFromJSON(ty, "[1, 3, 6, 10, 15, 21]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[11, 13, null, 17, null, 23]"), &options);
+ ArrayFromJSON(ty, "[1, 3, null, 7, null, 13]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[11, 13, null, 17, null, 23]"), &options);
+ ArrayFromJSON(ty, "[1, 3, null, 7, null, 13]"), &options);
CheckVectorUnary("cumulative_sum", ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, 12, null, 16, null, 22]"), &options);
+ ArrayFromJSON(ty, "[null, 2, null, 6, null, 12]"), &options);
CheckVectorUnary("cumulative_sum_checked",
ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
- ArrayFromJSON(ty, "[null, 12, null, 16, null, 22]"), &options);
+ ArrayFromJSON(ty, "[null, 2, null, 6, null, 12]"), &options);
CheckVectorUnary("cumulative_sum",
ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, 16, 20, 25, 31]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[1, 3, 6, 10, 15, 21]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, 16, 20, 25, 31]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[1, 3, 6, 10, 15, 21]"}), &options);
- CheckVectorUnary(
- "cumulative_sum", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, null, 17, null, 23]"}), &options);
+ CheckVectorUnary("cumulative_sum",
+ ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 3, null, 7, null, 13]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[11, 13, null, 17, null, 23]"}),
- &options);
+ ChunkedArrayFromJSON(ty, {"[1, 3, null, 7, null, 13]"}), &options);
CheckVectorUnary(
"cumulative_sum", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, 12, null, 16, null, 22]"}), &options);
+ ChunkedArrayFromJSON(ty, {"[null, 2, null, 6, null, 12]"}), &options);
CheckVectorUnary("cumulative_sum_checked",
ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
- ChunkedArrayFromJSON(ty, {"[null, 12, null, 16, null, 22]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 2, null, 6, null, 12]"}),
+ &options);
+ }
+}
+
+TEST(TestCumulativeProd, NoStartNoSkip) {
+ CumulativeOptions options;
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"),
+ ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"),
+ ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
+ ArrayFromJSON(ty, "[1, 2, null, null, null, null]"), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ArrayFromJSON(ty, "[1, 2, null, 4, null, 6]"),
+ ArrayFromJSON(ty, "[1, 2, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ArrayFromJSON(ty, "[null, 2, null, 4, null, 6]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null, null]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null, null]"}),
&options);
+
+ CheckVectorUnary(
+ "cumulative_prod", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}),
+ &options);
+ }
+}
+
+TEST(TestCumulativeProd, HasStartNoSkip) {
+ CumulativeOptions options(2.0);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[2, 4, 12, 48]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[2, 4, 12, 48]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[2, 4, null, null]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[2, 4, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, null, null, null]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, 12, 48]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, 12, 48]"}), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, null, null]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, null, null]"}), &options);
+
+ CheckVectorUnary("cumulative_prod",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options);
+ }
+}
+
+TEST(TestCumulativeProd, HasStartDoSkip) {
+ CumulativeOptions options(2.0, true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[2, 4, 12, 48]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[2, 4, 12, 48]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[2, 4, null, 16]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[2, 4, null, 16]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, 4, null, 16]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, 4, null, 16]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, 12, 48]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, 12, 48]"}), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, null, 16]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 4, null, 16]"}), &options);
+
+ CheckVectorUnary("cumulative_prod",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 4, null, 16]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 4, null, 16]"}), &options);
+ }
+}
+
+TEST(TestCumulativeProd, NoStartDoSkip) {
+ CumulativeOptions options(true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[1, 2, 6, 24]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"),
+ ArrayFromJSON(ty, "[1, 2, 6, 24]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[1, 2, null, 8]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"),
+ ArrayFromJSON(ty, "[1, 2, null, 8]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, 2, null, 8]"), &options);
+ CheckVectorUnary("cumulative_prod_checked", ArrayFromJSON(ty, "[null, 2, null, 4]"),
+ ArrayFromJSON(ty, "[null, 2, null, 8]"), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24]"}), &options);
+
+ CheckVectorUnary("cumulative_prod", ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, null, 8]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[1, 2, null, 8]"}), &options);
+
+ CheckVectorUnary("cumulative_prod",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 2, null, 8]"}), &options);
+ CheckVectorUnary("cumulative_prod_checked",
+ ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 2, null, 8]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMax, NoStartNoSkip) {
+ CumulativeOptions options;
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, 3, 5, 4, 6]"),
+ ArrayFromJSON(ty, "[2, 2, 3, 5, 5, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[2, 2, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[null, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, 3]", "[5, 4, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 2, 3, 5, 5, 6]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_max", ChunkedArrayFromJSON(ty, {"[2, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 2, null, null, null, null]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_max", ChunkedArrayFromJSON(ty, {"[null, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMax, HasStartNoSkip) {
+ CumulativeOptions options(3.0);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, 3, 5, 4, 6]"),
+ ArrayFromJSON(ty, "[3, 3, 3, 5, 5, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[3, 3, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[null, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, 3]", "[5, 4, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, 3, 5, 5, 6]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_max", ChunkedArrayFromJSON(ty, {"[2, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, null, null, null, null]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_max", ChunkedArrayFromJSON(ty, {"[null, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMax, HasStartDoSkip) {
+ CumulativeOptions options(3.0, true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, 3, 5, 4, 6]"),
+ ArrayFromJSON(ty, "[3, 3, 3, 5, 5, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[3, 3, null, 5, null, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[null, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[null, 3, null, 5, null, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, 3]", "[5, 4, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, 3, 5, 5, 6]"}), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, null, 5, null, 6]"}), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[null, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 3, null, 5, null, 6]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMax, NoStartDoSkip) {
+ CumulativeOptions options(true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, 3, 5, 4, 6]"),
+ ArrayFromJSON(ty, "[2, 2, 3, 5, 5, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[2, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[2, 2, null, 5, null, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[null, 1, null, 5, null, 6]"),
+ ArrayFromJSON(ty, "[null, 1, null, 5, null, 6]"), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, 3]", "[5, 4, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 2, 3, 5, 5, 6]"}), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[2, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[2, 2, null, 5, null, 6]"}), &options);
+
+ CheckVectorUnary("cumulative_max",
+ ChunkedArrayFromJSON(ty, {"[null, 1, null]", "[5, null, 6]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 1, null, 5, null, 6]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMin, NoStartNoSkip) {
+ CumulativeOptions options;
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, 4, 2, 3, 1]"),
+ ArrayFromJSON(ty, "[5, 5, 4, 2, 2, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[5, 5, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[null, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[5, 5, 4, 2, 2, 1]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_min", ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[5, 5, null, null, null, null]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_min", ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMin, HasStartNoSkip) {
+ CumulativeOptions options(3.0);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, 4, 2, 3, 1]"),
+ ArrayFromJSON(ty, "[3, 3, 3, 2, 2, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[3, 3, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[null, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, 3, 2, 2, 1]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_min", ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, null, null, null, null]"}), &options);
+
+ CheckVectorUnary(
+ "cumulative_min", ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMin, HasStartDoSkip) {
+ CumulativeOptions options(3.0, true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, 4, 2, 3, 1]"),
+ ArrayFromJSON(ty, "[3, 3, 3, 2, 2, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[3, 3, null, 2, null, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[null, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[null, 3, null, 2, null, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, 3, 2, 2, 1]"}), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[3, 3, null, 2, null, 1]"}), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 3, null, 2, null, 1]"}), &options);
+ }
+}
+
+TEST(TestCumulativeMin, NoStartDoSkip) {
+ CumulativeOptions options(true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, 4, 2, 3, 1]"),
+ ArrayFromJSON(ty, "[5, 5, 4, 2, 2, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[5, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[5, 5, null, 2, null, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[null, 6, null, 2, null, 1]"),
+ ArrayFromJSON(ty, "[null, 6, null, 2, null, 1]"), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[5, 5, 4, 2, 2, 1]"}), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[5, 5, null, 2, null, 1]"}), &options);
+
+ CheckVectorUnary("cumulative_min",
+ ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2, null, 1]"}),
+ ChunkedArrayFromJSON(ty, {"[null, 6, null, 2, null, 1]"}), &options);
}
}
TEST(TestCumulativeSum, ConvenienceFunctionCheckOverflow) {
ASSERT_ARRAYS_EQUAL(*CumulativeSum(ArrayFromJSON(int8(), "[127, 1]"),
- CumulativeSumOptions::Defaults(), false)
+ CumulativeOptions::Defaults(), false)
->make_array(),
*ArrayFromJSON(int8(), "[127, -128]"));
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("overflow"),
CumulativeSum(ArrayFromJSON(int8(), "[127, 1]"),
- CumulativeSumOptions::Defaults(), true));
+ CumulativeOptions::Defaults(), true));
+}
+
+TEST(TestCumulativeProd, ConvenienceFunctionCheckOverflow) {
+ ASSERT_ARRAYS_EQUAL(*CumulativeProd(ArrayFromJSON(int8(), "[-128, -1]"),
+ CumulativeOptions::Defaults(), false)
+ ->make_array(),
+ *ArrayFromJSON(int8(), "[-128, -128]"));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("overflow"),
+ CumulativeSum(ArrayFromJSON(int8(), "[-128, -1]"),
+ CumulativeOptions::Defaults(), true));
+}
+
+TEST(TestCumulativeMax, ConvenienceFunction) {
+ ASSERT_ARRAYS_EQUAL(
+ *CumulativeMax(ArrayFromJSON(int8(), "[1, 2, 3]"), CumulativeOptions::Defaults())
+ ->make_array(),
+ *ArrayFromJSON(int8(), "[1, 2, 3]"));
+}
+
+TEST(TestCumulativeMin, ConvenienceFunction) {
+ ASSERT_ARRAYS_EQUAL(
+ *CumulativeMin(ArrayFromJSON(int8(), "[-1, -2, -3]"), CumulativeOptions::Defaults())
+ ->make_array(),
+ *ArrayFromJSON(int8(), "[-1, -2, -3]"));
+}
+
+TEST(TestCumulative, NaN) {
+ // addition with NaN is always NaN
+ CheckVectorUnary("cumulative_sum", ArrayFromJSON(float64(), "[1, 2, NaN, 4, 5]"),
+ ArrayFromJSON(float64(), "[1, 3, NaN, NaN, NaN]"));
+
+ // multiply with Nan is always NaN
+ CheckVectorUnary("cumulative_prod", ArrayFromJSON(float64(), "[1, 2, NaN, 4, 5]"),
+ ArrayFromJSON(float64(), "[1, 2, NaN, NaN, NaN]"));
+
+ // max with NaN is always ignored because Nan > a always returns false
+ CheckVectorUnary("cumulative_max", ArrayFromJSON(float64(), "[1, 2, NaN, 4, 5]"),
+ ArrayFromJSON(float64(), "[1, 2, 2, 4, 5]"));
+
+ // min with NaN is always ignored because Nan < a always returns false
+ CheckVectorUnary("cumulative_min", ArrayFromJSON(float64(), "[5, 4, NaN, 2, 1]"),
+ ArrayFromJSON(float64(), "[5, 4, 4, 2, 1]"));
}
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index d23b33e28f..0797306a67 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -693,6 +693,9 @@ inline std::shared_ptr<Scalar> MakeScalar(std::string value) {
return std::make_shared<StringScalar>(std::move(value));
}
+inline std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<Scalar>& scalar) {
+ return scalar;
+}
/// @}
template <typename ValueRef>
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 7a8aa67a0d..70c17ae2b9 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1613,28 +1613,39 @@ Array-wise ("vector") functions
Cumulative Functions
~~~~~~~~~~~~~~~~~~~~
-Cumulative functions are vector functions that perform a running total on their
-input using a given binary associative operation and output an array containing
-the corresponding intermediate running values. The input is expected to be of
-numeric type. By default these functions do not detect overflow. They are also
-available in an overflow-checking variant, suffixed ``_checked``, which returns
-an ``Invalid`` :class:`Status` when overflow is detected.
+Cumulative functions are vector functions that perform a running accumulation on
+their input using a given binary associative operation with an identidy element
+(a monoid) and output an array containing the corresponding intermediate running
+values. The input is expected to be of numeric type. By default these functions
+do not detect overflow. They are alsoavailable in an overflow-checking variant,
+suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when
+overflow is detected.
+------------------------+-------+-------------+-------------+--------------------------------+-------+
-| Function name | Arity | Input types | Output type | Options class | Notes |
-+========================+=======+=============+=============+================================+=======+
-| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) |
-+------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) |
-+------------------------+-------+-------------+-------------+--------------------------------+-------+
-
-* \(1) CumulativeSumOptions has two optional parameters. The first parameter
- :member:`CumulativeSumOptions::start` is a starting value for the running
- sum. It has a default value of 0. Specified values of ``start`` must have the
- same type as the input. The second parameter
- :member:`CumulativeSumOptions::skip_nulls` is a boolean. When set to
+| Function name | Arity | Input types | Output type | Options class | Notes |
++=========================+=======+=============+=============+================================+=======+
+| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+| cumulative_prod | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+| cumulative_prod_checked | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+| cumulative_max | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+| cumulative_min | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-------+
+
+* \(1) CumulativeOptions has two optional parameters. The first parameter
+ :member:`CumulativeOptions::start` is a starting value for the running
+ accumulation. It has a default value of 0 for `sum`, 1 for `prod`, min of
+ input type for `max`, and max of input type for `min`. Specified values of
+ ``start`` must be castable to the input type. The second parameter
+ :member:`CumulativeOptions::skip_nulls` is a boolean. When set to
false (the default), the first encountered null is propagated. When set to
- true, each null in the input produces a corresponding null in the output.
+ true, each null in the input produces a corresponding null in the output and
+ doesn't affect the accumulation forward.
Associative transforms
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index c04652e79c..43deedd653 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -52,9 +52,11 @@ Aggregations
Cumulative Functions
--------------------
-Cumulative functions are vector functions that perform a running total on their
-input and output an array containing the corresponding intermediate running values.
-By default these functions do not detect overflow. They are also
+Cumulative functions are vector functions that perform a running accumulation on
+their input using a given binary associative operation with an identidy element
+(a monoid) and output an array containing the corresponding intermediate running
+values. The input is expected to be of numeric type. By default these functions
+do not detect overflow. They are also
available in an overflow-checking variant, suffixed ``_checked``, which
throws an ``ArrowInvalid`` exception when overflow is detected.
@@ -63,6 +65,10 @@ throws an ``ArrowInvalid`` exception when overflow is detected.
cumulative_sum
cumulative_sum_checked
+ cumulative_prod
+ cumulative_prod_checked
+ cumulative_max
+ cumulative_min
Arithmetic Functions
--------------------
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index eaf9d1dfb6..d1aded326d 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -1928,31 +1928,44 @@ class PartitionNthOptions(_PartitionNthOptions):
self._set_options(pivot, null_placement)
-cdef class _CumulativeSumOptions(FunctionOptions):
+cdef class _CumulativeOptions(FunctionOptions):
def _set_options(self, start, skip_nulls):
- if not isinstance(start, Scalar):
+ if start is None:
+ self.wrapped.reset(new CCumulativeOptions(skip_nulls))
+ elif isinstance(start, Scalar):
+ self.wrapped.reset(new CCumulativeOptions(
+ pyarrow_unwrap_scalar(start), skip_nulls))
+ else:
try:
start = lib.scalar(start)
+ self.wrapped.reset(new CCumulativeOptions(
+ pyarrow_unwrap_scalar(start), skip_nulls))
except Exception:
_raise_invalid_function_option(
start, "`start` type for CumulativeSumOptions", TypeError)
- self.wrapped.reset(new CCumulativeSumOptions((<Scalar> start).unwrap(), skip_nulls))
-
-class CumulativeSumOptions(_CumulativeSumOptions):
+class CumulativeOptions(_CumulativeOptions):
"""
- Options for `cumulative_sum` function.
+ Options for `cumulative_*` functions.
+
+ - cumulative_sum
+ - cumulative_sum_checked
+ - cumulative_prod
+ - cumulative_prod_checked
+ - cumulative_max
+ - cumulative_min
Parameters
----------
- start : Scalar, default 0.0
- Starting value for sum computation
+ start : Scalar, default None
+ Starting value for the cumulative operation. If none is given,
+ a default value depending on the operation and input type is used.
skip_nulls : bool, default False
When false, the first encountered null is propagated.
"""
- def __init__(self, start=0.0, *, skip_nulls=False):
+ def __init__(self, start=None, *, skip_nulls=False):
self._set_options(start, skip_nulls)
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index e92f093547..3d428758a4 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -33,7 +33,8 @@ from pyarrow._compute import ( # noqa
AssumeTimezoneOptions,
CastOptions,
CountOptions,
- CumulativeSumOptions,
+ CumulativeOptions,
+ CumulativeOptions as CumulativeSumOptions,
DayOfWeekOptions,
DictionaryEncodeOptions,
RunEndEncodeOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 86f21f4b52..37a261c833 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2400,10 +2400,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
int64_t pivot
CNullPlacement null_placement
- cdef cppclass CCumulativeSumOptions \
- "arrow::compute::CumulativeSumOptions"(CFunctionOptions):
- CCumulativeSumOptions(shared_ptr[CScalar] start, c_bool skip_nulls)
- shared_ptr[CScalar] start
+ cdef cppclass CCumulativeOptions \
+ "arrow::compute::CumulativeOptions"(CFunctionOptions):
+ CCumulativeOptions(c_bool skip_nulls)
+ CCumulativeOptions(shared_ptr[CScalar] start, c_bool skip_nulls)
+ optional[shared_ptr[CScalar]] start
c_bool skip_nulls
cdef cppclass CArraySortOptions \
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 3e457259c7..38bdeb1263 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -156,7 +156,7 @@ def test_option_class_equality():
pc.NullOptions(),
pc.PadOptions(5),
pc.PartitionNthOptions(1, null_placement="at_start"),
- pc.CumulativeSumOptions(start=0, skip_nulls=False),
+ pc.CumulativeOptions(start=None, skip_nulls=False),
pc.QuantileOptions(),
pc.RandomOptions(),
pc.RankOptions(sort_keys="ascending",
@@ -2847,7 +2847,7 @@ def test_min_max_element_wise():
def test_cumulative_sum(start, skip_nulls):
# Exact tests (e.g., integral types)
start_int = int(start)
- starts = [start_int, pa.scalar(start_int, type=pa.int8()),
+ starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
pa.scalar(start_int, type=pa.int64())]
for strt in starts:
arrays = [
@@ -2865,10 +2865,11 @@ def test_cumulative_sum(start, skip_nulls):
for i, arr in enumerate(arrays):
result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
# Add `start` offset to expected array before comparing
- expected = pc.add(expected_arrays[i], strt)
+ expected = pc.add(expected_arrays[i], strt if strt is not None
+ else 0)
assert result.equals(expected)
- starts = [start, pa.scalar(start, type=pa.float32()),
+ starts = [None, start, pa.scalar(start, type=pa.float32()),
pa.scalar(start, type=pa.float64())]
for strt in starts:
arrays = [
@@ -2885,7 +2886,8 @@ def test_cumulative_sum(start, skip_nulls):
for i, arr in enumerate(arrays):
result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
# Add `start` offset to expected array before comparing
- expected = pc.add(expected_arrays[i], strt)
+ expected = pc.add(expected_arrays[i], strt if strt is not None
+ else 0)
np.testing.assert_array_almost_equal(result.to_numpy(
zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
@@ -2894,6 +2896,174 @@ def test_cumulative_sum(start, skip_nulls):
pc.cumulative_sum([1, 2, 3], start=strt)
+@pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
+@pytest.mark.parametrize('skip_nulls', (True, False))
+def test_cumulative_prod(start, skip_nulls):
+ # Exact tests (e.g., integral types)
+ start_int = int(start)
+ starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
+ pa.scalar(start_int, type=pa.int64())]
+ for strt in starts:
+ arrays = [
+ pa.array([1, 2, 3]),
+ pa.array([1, None, 20, 5]),
+ pa.chunked_array([[1, None], [20, 5]])
+ ]
+ expected_arrays = [
+ pa.array([1, 2, 6]),
+ pa.array([1, None, 20, 100])
+ if skip_nulls else pa.array([1, None, None, None]),
+ pa.chunked_array([[1, None, 20, 100]])
+ if skip_nulls else pa.chunked_array([[1, None, None, None]])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+ # Multiply `start` offset to expected array before comparing
+ expected = pc.multiply(expected_arrays[i], strt if strt is not None
+ else 1)
+ assert result.equals(expected)
+
+ starts = [None, start, pa.scalar(start, type=pa.float32()),
+ pa.scalar(start, type=pa.float64())]
+ for strt in starts:
+ arrays = [
+ pa.array([1.5, 2.5, 3.5]),
+ pa.array([1, np.nan, 2, -3, 4, 5]),
+ pa.array([1, np.nan, None, 3, None, 5])
+ ]
+ expected_arrays = [
+ np.array([1.5, 3.75, 13.125]),
+ np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]),
+ np.array([1, np.nan, None, np.nan, None, np.nan])
+ if skip_nulls else np.array([1, np.nan, None, None, None, None])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
+ # Multiply `start` offset to expected array before comparing
+ expected = pc.multiply(expected_arrays[i], strt if strt is not None
+ else 1)
+ np.testing.assert_array_almost_equal(result.to_numpy(
+ zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
+
+ for strt in ['a', pa.scalar('arrow'), 1.1]:
+ with pytest.raises(pa.ArrowInvalid):
+ pc.cumulative_prod([1, 2, 3], start=strt)
+
+
+@pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
+@pytest.mark.parametrize('skip_nulls', (True, False))
+def test_cumulative_max(start, skip_nulls):
+ # Exact tests (e.g., integral types)
+ start_int = int(start)
+ starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
+ pa.scalar(start_int, type=pa.int64())]
+ for strt in starts:
+ arrays = [
+ pa.array([2, 1, 3, 5, 4, 6]),
+ pa.array([2, 1, None, 5, 4, None]),
+ pa.chunked_array([[2, 1, None], [5, 4, None]])
+ ]
+ expected_arrays = [
+ pa.array([2, 2, 3, 5, 5, 6]),
+ pa.array([2, 2, None, 5, 5, None])
+ if skip_nulls else pa.array([2, 2, None, None, None, None]),
+ pa.chunked_array([[2, 2, None, 5, 5, None]])
+ if skip_nulls else
+ pa.chunked_array([[2, 2, None, None, None, None]])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+ # Max `start` offset with expected array before comparing
+ expected = pc.max_element_wise(
+ expected_arrays[i], strt if strt is not None else int(-1e9),
+ skip_nulls=False)
+ assert result.equals(expected)
+
+ starts = [None, start, pa.scalar(start, type=pa.float32()),
+ pa.scalar(start, type=pa.float64())]
+ for strt in starts:
+ arrays = [
+ pa.array([2.5, 1.3, 3.7, 5.1, 4.9, 6.2]),
+ pa.array([2.5, 1.3, 3.7, np.nan, 4.9, 6.2]),
+ pa.array([2.5, 1.3, None, np.nan, 4.9, None])
+ ]
+ expected_arrays = [
+ np.array([2.5, 2.5, 3.7, 5.1, 5.1, 6.2]),
+ np.array([2.5, 2.5, 3.7, 3.7, 4.9, 6.2]),
+ np.array([2.5, 2.5, None, 2.5, 4.9, None])
+ if skip_nulls else np.array([2.5, 2.5, None, None, None, None])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
+ # Max `start` offset with expected array before comparing
+ expected = pc.max_element_wise(
+ expected_arrays[i], strt if strt is not None else -1e9,
+ skip_nulls=False)
+ np.testing.assert_array_almost_equal(result.to_numpy(
+ zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
+
+ for strt in ['a', pa.scalar('arrow'), 1.1]:
+ with pytest.raises(pa.ArrowInvalid):
+ pc.cumulative_max([1, 2, 3], start=strt)
+
+
+@pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
+@pytest.mark.parametrize('skip_nulls', (True, False))
+def test_cumulative_min(start, skip_nulls):
+ # Exact tests (e.g., integral types)
+ start_int = int(start)
+ starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
+ pa.scalar(start_int, type=pa.int64())]
+ for strt in starts:
+ arrays = [
+ pa.array([5, 6, 4, 2, 3, 1]),
+ pa.array([5, 6, None, 2, 3, None]),
+ pa.chunked_array([[5, 6, None], [2, 3, None]])
+ ]
+ expected_arrays = [
+ pa.array([5, 5, 4, 2, 2, 1]),
+ pa.array([5, 5, None, 2, 2, None])
+ if skip_nulls else pa.array([5, 5, None, None, None, None]),
+ pa.chunked_array([[5, 5, None, 2, 2, None]])
+ if skip_nulls else
+ pa.chunked_array([[5, 5, None, None, None, None]])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+ # Min `start` offset with expected array before comparing
+ expected = pc.min_element_wise(
+ expected_arrays[i], strt if strt is not None else int(1e9),
+ skip_nulls=False)
+ assert result.equals(expected)
+
+ starts = [None, start, pa.scalar(start, type=pa.float32()),
+ pa.scalar(start, type=pa.float64())]
+ for strt in starts:
+ arrays = [
+ pa.array([5.5, 6.3, 4.7, 2.1, 3.9, 1.2]),
+ pa.array([5.5, 6.3, 4.7, np.nan, 3.9, 1.2]),
+ pa.array([5.5, 6.3, None, np.nan, 3.9, None])
+ ]
+ expected_arrays = [
+ np.array([5.5, 5.5, 4.7, 2.1, 2.1, 1.2]),
+ np.array([5.5, 5.5, 4.7, 4.7, 3.9, 1.2]),
+ np.array([5.5, 5.5, None, 5.5, 3.9, None])
+ if skip_nulls else np.array([5.5, 5.5, None, None, None, None])
+ ]
+ for i, arr in enumerate(arrays):
+ result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
+ # Min `start` offset with expected array before comparing
+ expected = pc.min_element_wise(
+ expected_arrays[i], strt if strt is not None else 1e9,
+ skip_nulls=False)
+ np.testing.assert_array_almost_equal(result.to_numpy(
+ zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
+
+ for strt in ['a', pa.scalar('arrow'), 1.1]:
+ with pytest.raises(pa.ArrowInvalid):
+ pc.cumulative_max([1, 2, 3], start=strt)
+
+
def test_make_struct():
assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}