You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/07/06 18:27:49 UTC

[GitHub] [arrow] lidavidm commented on a diff in pull request #13521: ARROW-16757: [C++] Remove "scalar" output modality for ScalarKernel implementations, remove ValueDescr class

lidavidm commented on code in PR #13521:
URL: https://github.com/apache/arrow/pull/13521#discussion_r914967972


##########
cpp/src/arrow/scalar.h:
##########
@@ -479,37 +481,52 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 
   Result<std::shared_ptr<Scalar>> field(FieldRef ref) const;
 
-  StructScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  StructScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(value)) {}
 
   static Result<std::shared_ptr<StructScalar>> Make(ValueType value,
                                                     std::vector<std::string> field_names);
-
-  explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
 };
 
 struct ARROW_EXPORT UnionScalar : public Scalar {
-  using Scalar::Scalar;
-  using ValueType = std::shared_ptr<Scalar>;
-
-  ValueType value;
   int8_t type_code;
 
-  UnionScalar(int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), false), type_code(type_code) {}
-
-  UnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {}
+ protected:
+  UnionScalar(std::shared_ptr<DataType> type, int8_t type_code, bool is_valid)
+      : Scalar(std::move(type), is_valid), type_code(type_code) {}
 };
 
 struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
-  using UnionScalar::UnionScalar;
   using TypeClass = SparseUnionType;
+
+  // Even though only one of the union values is relevant for this scalar, we
+  // nonetheless construct a vector of scalars, one per union value, to have
+  // enough data to reconstruct a valid ArraySpan of length 1 from this scalar
+  using ValueType = std::vector<std::shared_ptr<Scalar>>;
+  ValueType value;

Review Comment:
   This works. I suppose if we really wanted, we could keep a static buffer of zeroes and construct the span from slices of that based on type but that'd be quite a bit of work for one particular case.



##########
cpp/src/arrow/type.h:
##########
@@ -191,7 +191,7 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
   // context. Implementation requires enable_shared_from_this but we may fix
   // this in the future
-  std::shared_ptr<DataType> Copy() const {
+  std::shared_ptr<DataType> GetSharedPtr() const {

Review Comment:
   Should we also remove the "Implementation requires enable_shared_from_this but we may fix this in the future" note?



##########
cpp/src/arrow/compute/function_internal.h:
##########
@@ -430,6 +434,12 @@ static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromSca
   return value->type;
 }
 
+template <typename T>
+static inline enable_if_same_result<T, TypeHolder> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+

Review Comment:
   It is rather complicated but it saves us a good amount of boilerplate to implement serialization/ToString/equality for all the options classes. Not sure if there's a better way (more codegen?)



##########
cpp/src/arrow/scalar_test.cc:
##########
@@ -1370,27 +1373,61 @@ TEST(TestDictionaryScalar, Cast) {
   }
 }
 
+const Scalar& GetUnionValue(const Scalar& value) {

Review Comment:
   Should this just be a getter of (Base)UnionScalar? Though I suppose then you'd have to cast to use it in the tests here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org