You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/06/28 14:43:47 UTC
[arrow] branch master updated: ARROW-9250: [C++] Instantiate fewer
templates in IsIn, Match kernel implementations
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 22994b79 ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel implementations
22994b79 is described below
commit 22994b7927868c01d68bc0e3b727c49352e794d5
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sun Jun 28 09:43:28 2020 -0500
ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel implementations
This yields a 150KB reduction in code for me on Linux.
Since this may become a common pattern (using e.g. a single `uint32_t`-based function to process both int32/uint32), some of this may be factored out to make writing such kernel implementations simpler in the future.
Closes #7558 from wesm/ARROW-9250
Authored-by: Wes McKinney <we...@apache.org>
Signed-off-by: Wes McKinney <we...@apache.org>
---
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc | 97 +++++++++++++++++-----
cpp/src/arrow/python/python_to_arrow.cc | 3 +-
cpp/src/arrow/type.h | 6 +-
3 files changed, 83 insertions(+), 23 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 1eb810a..1d8c787 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -34,12 +34,6 @@ namespace compute {
namespace internal {
namespace {
-template <typename T, typename R = void>
-using enable_if_supports_set_lookup =
- enable_if_t<has_c_type<T>::value || is_base_binary_type<T>::value ||
- is_fixed_size_binary_type<T>::value || is_decimal_type<T>::value,
- R>;
-
template <typename Type>
struct SetLookupState : public KernelState {
explicit SetLookupState(MemoryPool* pool)
@@ -91,6 +85,30 @@ struct SetLookupState<NullType> : public KernelState {
int64_t lookup_null_count;
};
+// TODO: Put this concept somewhere reusable
+template <int width>
+struct UnsignedIntType;
+
+template <>
+struct UnsignedIntType<1> {
+ using Type = UInt8Type;
+};
+
+template <>
+struct UnsignedIntType<2> {
+ using Type = UInt16Type;
+};
+
+template <>
+struct UnsignedIntType<4> {
+ using Type = UInt32Type;
+};
+
+template <>
+struct UnsignedIntType<8> {
+ using Type = UInt64Type;
+};
+
// Constructing the type requires a type parameter
struct InitStateVisitor {
KernelContext* ctx;
@@ -114,15 +132,24 @@ struct InitStateVisitor {
Status Visit(const DataType&) { return Init<NullType>(); }
template <typename Type>
- enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
- return Init<Type>();
+ enable_if_boolean<Type, Status> Visit(const Type&) {
+ return Init<BooleanType>();
}
- // Handle Decimal128 as a physical string, not a number
- Status Visit(const Decimal128Type& type) {
- return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+ template <typename Type>
+ enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+ const Type&) {
+ return Init<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
}
+ template <typename Type>
+ enable_if_base_binary<Type, Status> Visit(const Type&) {
+ return Init<typename Type::PhysicalType>();
+ }
+
+ // Handle Decimal128Type, FixedSizeBinaryType
+ Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }
+
Status GetResult(std::unique_ptr<KernelState>* out) {
RETURN_NOT_OK(VisitTypeInline(*options->value_set.type(), this));
*out = std::move(result);
@@ -163,7 +190,7 @@ struct MatchVisitor {
}
template <typename Type>
- enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
+ Status ProcessMatch() {
using T = typename GetViewType<Type>::T;
const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
@@ -194,9 +221,25 @@ struct MatchVisitor {
return Status::OK();
}
- // Handle Decimal128 as a physical string, not a number
- Status Visit(const Decimal128Type& type) {
- return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+ template <typename Type>
+ enable_if_boolean<Type, Status> Visit(const Type&) {
+ return ProcessMatch<BooleanType>();
+ }
+
+ template <typename Type>
+ enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+ const Type&) {
+ return ProcessMatch<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
+ }
+
+ template <typename Type>
+ enable_if_base_binary<Type, Status> Visit(const Type&) {
+ return ProcessMatch<typename Type::PhysicalType>();
+ }
+
+ // Handle Decimal128Type, FixedSizeBinaryType
+ Status Visit(const FixedSizeBinaryType& type) {
+ return ProcessMatch<FixedSizeBinaryType>();
}
Status Execute() {
@@ -243,7 +286,7 @@ struct IsInVisitor {
}
template <typename Type>
- enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
+ Status ProcessIsIn() {
using T = typename GetViewType<Type>::T;
const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
ArrayData* output = out->mutable_array();
@@ -275,9 +318,25 @@ struct IsInVisitor {
return Status::OK();
}
- // Handle Decimal128 as a physical string, not a number
- Status Visit(const Decimal128Type& type) {
- return Visit(checked_cast<const FixedSizeBinaryType&>(type));
+ template <typename Type>
+ enable_if_boolean<Type, Status> Visit(const Type&) {
+ return ProcessIsIn<BooleanType>();
+ }
+
+ template <typename Type>
+ enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
+ const Type&) {
+ return ProcessIsIn<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
+ }
+
+ template <typename Type>
+ enable_if_base_binary<Type, Status> Visit(const Type&) {
+ return ProcessIsIn<typename Type::PhysicalType>();
+ }
+
+ // Handle Decimal128Type, FixedSizeBinaryType
+ Status Visit(const FixedSizeBinaryType& type) {
+ return ProcessIsIn<FixedSizeBinaryType>();
}
Status Execute() { return VisitTypeInline(*data.type, this); }
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index bdaae4c..2805e5d 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -642,8 +642,7 @@ class StringConverter
// We should have bailed out earlier
DCHECK(!STRICT);
- auto binary_type =
- TypeTraits<typename TypeClass::EquivalentBinaryType>::type_singleton();
+ auto binary_type = TypeTraits<typename TypeClass::PhysicalType>::type_singleton();
return (*out)->View(binary_type).Value(out);
}
return Status::OK();
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4801076..c343a98 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -829,6 +829,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
static constexpr Type::type type_id = Type::BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int32_t;
+ using PhysicalType = BinaryType;
static constexpr const char* type_name() { return "binary"; }
@@ -856,6 +857,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
static constexpr Type::type type_id = Type::LARGE_BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int64_t;
+ using PhysicalType = LargeBinaryType;
static constexpr const char* type_name() { return "large_binary"; }
@@ -882,7 +884,7 @@ class ARROW_EXPORT StringType : public BinaryType {
public:
static constexpr Type::type type_id = Type::STRING;
static constexpr bool is_utf8 = true;
- using EquivalentBinaryType = BinaryType;
+ using PhysicalType = BinaryType;
static constexpr const char* type_name() { return "utf8"; }
@@ -900,7 +902,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType {
public:
static constexpr Type::type type_id = Type::LARGE_STRING;
static constexpr bool is_utf8 = true;
- using EquivalentBinaryType = LargeBinaryType;
+ using PhysicalType = LargeBinaryType;
static constexpr const char* type_name() { return "large_utf8"; }