You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/06/28 14:43:47 UTC

[arrow] branch master updated: ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel implementations

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 22994b79 ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel implementations
22994b79 is described below

commit 22994b7927868c01d68bc0e3b727c49352e794d5
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sun Jun 28 09:43:28 2020 -0500

    ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel implementations
    
    This yields a 150KB reduction in code for me on Linux.
    
    Since this may become a common pattern (using e.g. a single `uint32_t`-based function to process both int32/uint32), some of this may be factored out to make writing such kernel implementations simpler in the future.
    
    Closes #7558 from wesm/ARROW-9250
    
    Authored-by: Wes McKinney <we...@apache.org>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/src/arrow/compute/kernels/scalar_set_lookup.cc | 97 +++++++++++++++++-----
 cpp/src/arrow/python/python_to_arrow.cc            |  3 +-
 cpp/src/arrow/type.h                               |  6 +-
 3 files changed, 83 insertions(+), 23 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 1eb810a..1d8c787 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -34,12 +34,6 @@ namespace compute {
 namespace internal {
 namespace {
 
-template <typename T, typename R = void>
-using enable_if_supports_set_lookup =
-    enable_if_t<has_c_type<T>::value || is_base_binary_type<T>::value ||
-                    is_fixed_size_binary_type<T>::value || is_decimal_type<T>::value,
-                R>;
-
 template <typename Type>
 struct SetLookupState : public KernelState {
   explicit SetLookupState(MemoryPool* pool)
@@ -91,6 +85,30 @@ struct SetLookupState<NullType> : public KernelState {
   int64_t lookup_null_count;
 };
 
+// TODO: Put this concept somewhere reusable
+template <int width>
+struct UnsignedIntType;
+
+template <>
+struct UnsignedIntType<1> {
+  using Type = UInt8Type;
+};
+
+template <>
+struct UnsignedIntType<2> {
+  using Type = UInt16Type;
+};
+
+template <>
+struct UnsignedIntType<4> {
+  using Type = UInt32Type;
+};
+
+template <>
+struct UnsignedIntType<8> {
+  using Type = UInt64Type;
+};
+
 // Constructing the type requires a type parameter
 struct InitStateVisitor {
   KernelContext* ctx;
@@ -114,15 +132,24 @@ struct InitStateVisitor {
   Status Visit(const DataType&) { return Init<NullType>(); }
 
   template <typename Type>
-  enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
-    return Init<Type>();
+  enable_if_boolean<Type, Status> Visit(const Type&) {
+    return Init<BooleanType>();
   }
 
-  // Handle Decimal128 as a physical string, not a number
-  Status Visit(const Decimal128Type& type) {
-    return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+  template <typename Type>
+  enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+      const Type&) {
+    return Init<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
   }
 
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    return Init<typename Type::PhysicalType>();
+  }
+
+  // Handle Decimal128Type, FixedSizeBinaryType
+  Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }
+
   Status GetResult(std::unique_ptr<KernelState>* out) {
     RETURN_NOT_OK(VisitTypeInline(*options->value_set.type(), this));
     *out = std::move(result);
@@ -163,7 +190,7 @@ struct MatchVisitor {
   }
 
   template <typename Type>
-  enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
+  Status ProcessMatch() {
     using T = typename GetViewType<Type>::T;
 
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
@@ -194,9 +221,25 @@ struct MatchVisitor {
     return Status::OK();
   }
 
-  // Handle Decimal128 as a physical string, not a number
-  Status Visit(const Decimal128Type& type) {
-    return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+  template <typename Type>
+  enable_if_boolean<Type, Status> Visit(const Type&) {
+    return ProcessMatch<BooleanType>();
+  }
+
+  template <typename Type>
+  enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+      const Type&) {
+    return ProcessMatch<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    return ProcessMatch<typename Type::PhysicalType>();
+  }
+
+  // Handle Decimal128Type, FixedSizeBinaryType
+  Status Visit(const FixedSizeBinaryType& type) {
+    return ProcessMatch<FixedSizeBinaryType>();
   }
 
   Status Execute() {
@@ -243,7 +286,7 @@ struct IsInVisitor {
   }
 
   template <typename Type>
-  enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
+  Status ProcessIsIn() {
     using T = typename GetViewType<Type>::T;
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
     ArrayData* output = out->mutable_array();
@@ -275,9 +318,25 @@ struct IsInVisitor {
     return Status::OK();
   }
 
-  // Handle Decimal128 as a physical string, not a number
-  Status Visit(const Decimal128Type& type) {
-    return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+  template <typename Type>
+  enable_if_boolean<Type, Status> Visit(const Type&) {
+    return ProcessIsIn<BooleanType>();
+  }
+
+  template <typename Type>
+  enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+      const Type&) {
+    return ProcessIsIn<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    return ProcessIsIn<typename Type::PhysicalType>();
+  }
+
+  // Handle Decimal128Type, FixedSizeBinaryType
+  Status Visit(const FixedSizeBinaryType& type) {
+    return ProcessIsIn<FixedSizeBinaryType>();
   }
 
   Status Execute() { return VisitTypeInline(*data.type, this); }
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index bdaae4c..2805e5d 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -642,8 +642,7 @@ class StringConverter
       // We should have bailed out earlier
       DCHECK(!STRICT);
 
-      auto binary_type =
-          TypeTraits<typename TypeClass::EquivalentBinaryType>::type_singleton();
+      auto binary_type = TypeTraits<typename TypeClass::PhysicalType>::type_singleton();
       return (*out)->View(binary_type).Value(out);
     }
     return Status::OK();
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4801076..c343a98 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -829,6 +829,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
   static constexpr Type::type type_id = Type::BINARY;
   static constexpr bool is_utf8 = false;
   using offset_type = int32_t;
+  using PhysicalType = BinaryType;
 
   static constexpr const char* type_name() { return "binary"; }
 
@@ -856,6 +857,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
   static constexpr Type::type type_id = Type::LARGE_BINARY;
   static constexpr bool is_utf8 = false;
   using offset_type = int64_t;
+  using PhysicalType = LargeBinaryType;
 
   static constexpr const char* type_name() { return "large_binary"; }
 
@@ -882,7 +884,7 @@ class ARROW_EXPORT StringType : public BinaryType {
  public:
   static constexpr Type::type type_id = Type::STRING;
   static constexpr bool is_utf8 = true;
-  using EquivalentBinaryType = BinaryType;
+  using PhysicalType = BinaryType;
 
   static constexpr const char* type_name() { return "utf8"; }
 
@@ -900,7 +902,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType {
  public:
   static constexpr Type::type type_id = Type::LARGE_STRING;
   static constexpr bool is_utf8 = true;
-  using EquivalentBinaryType = LargeBinaryType;
+  using PhysicalType = LargeBinaryType;
 
   static constexpr const char* type_name() { return "large_utf8"; }