You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "felipecrv (via GitHub)" <gi...@apache.org> on 2023/06/09 23:14:02 UTC

[GitHub] [arrow] felipecrv commented on a diff in pull request #36018: GH-35581: [C++] Store offsets in scalars

felipecrv commented on code in PR #36018:
URL: https://github.com/apache/arrow/pull/36018#discussion_r1224861844


##########
cpp/CMakePresets.json:
##########
@@ -11,6 +11,7 @@
       "hidden": true,
       "generator": "Ninja",
       "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",

Review Comment:
   I don't think we want to merge this and generate the huge file in CI builds, right?



##########
cpp/src/arrow/array/data.cc:
##########
@@ -265,13 +266,12 @@ int GetNumBuffers(const DataType& type) {
 namespace internal {
 
 void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
-  memset(span->scratch_space, 0x00, sizeof(span->scratch_space));
-
   span->type = type;
   span->length = 0;
   int num_buffers = GetNumBuffers(*type);
   for (int i = 0; i < num_buffers; ++i) {
-    span->buffers[i].data = reinterpret_cast<uint8_t*>(span->scratch_space);
+    static int64_t zero{0};
+    span->buffers[i].data = reinterpret_cast<uint8_t*>(&zero);

Review Comment:
   You're casting a pointer to static data to a non-const pointer. That could be bad if you needed this buffer to always be zero, but you don't really need that, so maybe rename it from `zero` to `scratch_buffer`?



##########
cpp/src/arrow/scalar.h:
##########
@@ -111,6 +111,12 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
  protected:
   Scalar(std::shared_ptr<DataType> type, bool is_valid)
       : type(std::move(type)), is_valid(is_valid) {}
+
+  // 16 bytes of scratch space to enable ArraySpan to be a view onto any
+  // Scalar- including binary scalars where we need to create a buffer
+  // that looks like two 32-bit or 64-bit offsets.
+  alignas(int64_t) std::array<uint8_t, sizeof(int64_t) * 2> scratch_space_{0};

Review Comment:
   Why not `alignas(uint64_t) uint8_t scratch_space_[16];`?
   
   I would leave out the 0 initialization as well. Leave it undefined for scalars that don't need the scratch buffers.



##########
cpp/src/arrow/array/data.cc:
##########
@@ -265,13 +266,12 @@ int GetNumBuffers(const DataType& type) {
 namespace internal {
 
 void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
-  memset(span->scratch_space, 0x00, sizeof(span->scratch_space));
-
   span->type = type;
   span->length = 0;
   int num_buffers = GetNumBuffers(*type);
   for (int i = 0; i < num_buffers; ++i) {
-    span->buffers[i].data = reinterpret_cast<uint8_t*>(span->scratch_space);
+    static int64_t zero{0};
+    span->buffers[i].data = reinterpret_cast<uint8_t*>(&zero);

Review Comment:
   This would be new, but it's probably better to set buffer 0 (validity bitmap) to `nullptr` than to set it to a empty buffer.



##########
cpp/src/arrow/array/data.cc:
##########
@@ -228,11 +228,12 @@ void ArraySpan::SetMembers(const ArrayData& data) {
 namespace {
 
 template <typename offset_type>
-void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_size,
+void SetOffsetsForScalar(ArraySpan* span, uint8_t* buffer, int64_t value_size,

Review Comment:
   `value_size` could be declared as `offset_type` instead of `int64_t`.



##########
cpp/src/arrow/array/data.cc:
##########
@@ -354,6 +353,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
     const auto& scalar = checked_cast<const BaseListScalar&>(value);
 
     int64_t value_length = 0;
+    (void)value_length;

Review Comment:
   Leftover?



##########
cpp/src/arrow/scalar.h:
##########
@@ -111,6 +111,12 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
  protected:
   Scalar(std::shared_ptr<DataType> type, bool is_valid)
       : type(std::move(type)), is_valid(is_valid) {}
+
+  // 16 bytes of scratch space to enable ArraySpan to be a view onto any
+  // Scalar- including binary scalars where we need to create a buffer
+  // that looks like two 32-bit or 64-bit offsets.
+  alignas(int64_t) std::array<uint8_t, sizeof(int64_t) * 2> scratch_space_{0};

Review Comment:
   This would also remove the need to call `.data()` above which was weird.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org