You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/02 00:58:46 UTC

[GitHub] [arrow] edponce commented on a change in pull request #12460: ARROW-13530: [C++] Implement cumulative sum compute function

edponce commented on a change in pull request #12460:
URL: https://github.com/apache/arrow/pull/12460#discussion_r840985029



##########
File path: cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
##########
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+using internal::AddWithOverflow;
+
+namespace compute {
+namespace internal {
+
+struct Add {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_value<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
+    return left + right;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer_value<T> Call(KernelContext*, Arg0 left,
+                                                            Arg1 right, Status*) {
+    return left + right;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer_value<T> Call(KernelContext*, Arg0 left,
+                                                          Arg1 right, Status*) {
+    return arrow::internal::SafeSignedAdd(left, right);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal_value<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
+};
+
+struct AddChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer_value<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                         Status* st) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
+    T result = 0;
+    if (ARROW_PREDICT_FALSE(AddWithOverflow(left, right, &result))) {
+      *st = Status::Invalid("overflow");
+    }
+    return result;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_value<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
+    return left + right;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal_value<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
+};
+
+template <int64_t multiple>
+struct AddTimeDuration {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+    T result =
+        arrow::internal::SafeSignedAdd(static_cast<T>(left), static_cast<T>(right));
+    if (result < 0 || multiple <= result) {
+      *st = Status::Invalid(result, " is not within the acceptable range of ", "[0, ",
+                            multiple, ") s");
+    }
+    return result;
+  }
+};
+
+template <int64_t multiple>
+struct AddTimeDurationChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+    T result = 0;
+    if (ARROW_PREDICT_FALSE(
+            AddWithOverflow(static_cast<T>(left), static_cast<T>(right), &result))) {
+      *st = Status::Invalid("overflow");
+    }
+    if (result < 0 || multiple <= result) {

Review comment:
       Why not move all the basic arithmetic operations and not only the addition kernels? We need to follow a consistent convention.

##########
File path: cpp/src/arrow/compute/api_vector.cc
##########
@@ -176,6 +180,22 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector<SortKey> sort_keys)
       sort_keys(std::move(sort_keys)) {}
 constexpr char SelectKOptions::kTypeName[];
 
+// CumulativeGenericOptions::CumulativeGenericOptions(uint64_t start, bool skip_nulls)
+//     : CumulativeGenericOptions(std::make_shared<UInt64Scalar>(start), skip_nulls) {}
+//
+// CumulativeGenericOptions::CumulativeGenericOptions(int64_t start, bool skip_nulls)
+//     : CumulativeGenericOptions(std::make_shared<Int64Scalar>(start), skip_nulls) {}
+//
+// CumulativeGenericOptions::CumulativeGenericOptions(double start, bool skip_nulls)
+//     : CumulativeGenericOptions(std::make_shared<DoubleScalar>(start), skip_nulls) {}
+
+CumulativeGenericOptions::CumulativeGenericOptions(std::shared_ptr<Scalar> start,
+                                                   bool skip_nulls)
+    : FunctionOptions(internal::kCumulativeGenericOptionsType),
+      start(std::move(start)),

Review comment:
       Not sure we should move here. Why take the value from client code?

##########
File path: cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
##########
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdio>
+#include <functional>
+#include <locale>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/buffer.h"
+#include "arrow/testing/gtest_util.h"  // IntegralArrowTypes
+#include "arrow/testing/util.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+
+#include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
+
+#include "arrow/ipc/json_simple.h"
+
+namespace arrow {
+namespace compute {
+
+using CumulativeTypes =
+    testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                   Int32Type, Int64Type, FloatType, DoubleType>;
+
+template <typename T>
+class TestCumulativeOp : public ::testing::Test {
+ public:
+  using ArrowType = T;
+  using ArrowScalar = typename TypeTraits<T>::ScalarType;
+  using CType = typename TypeTraits<T>::CType;
+
+ protected:
+  std::shared_ptr<DataType> type_singleton() { return default_type_instance<T>(); }
+
+  std::shared_ptr<Array> array(const std::string& value) {
+    return ArrayFromJSON(type_singleton(), value);
+  }
+
+  template <typename V = T>
+  enable_if_t<!is_floating_type<V>::value, void> Assert(
+      const std::string func, const std::shared_ptr<Array>& input,
+      const std::shared_ptr<Array>& expected, const CumulativeGenericOptions& options) {
+    ASSERT_OK_AND_ASSIGN(auto result,
+                         CallFunction(func, {Datum(input)}, &options, nullptr));
+
+    AssertArraysEqual(*expected, *result.make_array(), false, EqualOptions::Defaults());
+  }
+
+  template <typename V = T>
+  enable_if_floating_point<V> Assert(const std::string func,
+                                     const std::shared_ptr<Array>& input,
+                                     const std::shared_ptr<Array>& expected,
+                                     const CumulativeGenericOptions& options) {
+    ASSERT_OK_AND_ASSIGN(auto result,
+                         CallFunction(func, {Datum(input)}, &options, nullptr));
+
+    AssertArraysApproxEqual(*expected, *result.make_array(), false,
+                            EqualOptions::Defaults());
+  }
+};
+
+template <typename T>
+class TestCumulativeSum : public TestCumulativeOp<T> {
+ public:
+  using ArrowType = typename TestCumulativeOp<T>::ArrowType;
+  using ArrowScalar = typename TestCumulativeOp<T>::ArrowScalar;
+  using CType = typename TestCumulativeOp<T>::CType;
+
+ protected:
+  template <typename U = T>
+  enable_if_parameter_free<U, CumulativeGenericOptions> generate_options(
+      CType start = 0, bool skip_nulls = false) {
+    return CumulativeGenericOptions(std::make_shared<ArrowScalar>(start), skip_nulls);
+  }
+
+  template <typename U = T>
+  enable_if_t<is_time_type<U>::value || is_timestamp_type<U>::value,
+              CumulativeGenericOptions>
+  generate_options(CType start = 0, bool skip_nulls = false) {
+    TimeUnit::type unit;
+    switch (ArrowType::type_id) {
+      case Type::TIME64:
+        unit = TimeUnit::NANO;
+        break;
+      default:
+        unit = TimeUnit::SECOND;
+        break;
+    }
+    return CumulativeGenericOptions(std::make_shared<ArrowScalar>(start, unit),
+                                    skip_nulls);
+  }
+
+  void Assert(const std::string& values, const std::string& expected,
+              const CumulativeGenericOptions& options) {
+    auto values_arr = TestCumulativeOp<T>::array(values);
+    auto expected_arr = TestCumulativeOp<T>::array(expected);
+    TestCumulativeOp<T>::Assert("cumulative_sum", values_arr, expected_arr, options);
+  }
+};
+
+TYPED_TEST_SUITE(TestCumulativeSum, CumulativeTypes);
+
+TYPED_TEST(TestCumulativeSum, NoStartNoSkipNoNulls) {
+  CumulativeGenericOptions options = this->generate_options();
+  auto empty = "[]";
+  auto values = "[1, 2, 3, 4, 5, 6]";
+  auto expected = "[1, 3, 6, 10, 15, 21]";
+  this->Assert(empty, empty, options);

Review comment:
       Need to add tests with `ChunkedArray` inputs.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org