You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/14 10:40:16 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #13364: ARROW-16756: [C++] Introduce non-owning ArraySpan, ExecSpan data structures and refactor ScalarKernels to use them

pitrou commented on code in PR #13364:
URL: https://github.com/apache/arrow/pull/13364#discussion_r896660360


##########
cpp/src/arrow/array/data.cc:
##########
@@ -128,6 +131,141 @@ int64_t ArrayData::GetNullCount() const {
   return precomputed;
 }
 
+// ----------------------------------------------------------------------
+// Methods for ArraySpan
+
+void ArraySpan::SetMembers(const ArrayData& data) {
+  this->type = data.type.get();
+  this->length = data.length;
+  this->null_count = data.null_count.load();
+  this->offset = data.offset;
+
+  for (int i = 0; i < static_cast<int>(data.buffers.size()); ++i) {
+    const std::shared_ptr<Buffer>& buffer = data.buffers[i];
+    // It is the invoker-of-kernels's responsibility to ensure that
+    // const buffers are not written to accidentally.
+    if (buffer) {
+      SetBuffer(i, buffer);
+    } else {
+      ClearBuffer(i);
+    }
+  }
+
+  // Makes sure any other buffers are seen as null / non-existent
+  for (int i = static_cast<int>(data.buffers.size()); i < 3; ++i) {
+    ClearBuffer(i);
+  }
+
+  if (this->type->id() == Type::DICTIONARY) {
+    this->child_data.resize(1);
+    this->child_data[0].SetMembers(*data.dictionary);
+  } else {
+    this->child_data.resize(data.child_data.size());
+    for (size_t child_index = 0; child_index < data.child_data.size(); ++child_index) {
+      this->child_data[child_index].SetMembers(*data.child_data[child_index]);
+    }
+  }
+}
+
+void ArraySpan::FillFromScalar(const Scalar& value) {
+  static const uint8_t kValidByte = 0x01;
+  static const uint8_t kNullByte = 0x00;
+
+  this->type = value.type.get();
+  this->length = 1;
+
+  // Populate null count and validity bitmap
+  this->null_count = value.is_valid ? 0 : 1;
+  this->buffers[0].data = const_cast<uint8_t*>(value.is_valid ? &kValidByte : &kNullByte);
+  this->buffers[0].size = 1;
+
+  if (is_primitive(value.type->id())) {
+    const auto& scalar =
+        internal::checked_cast<const internal::PrimitiveScalarBase&>(value);
+    const uint8_t* scalar_data = reinterpret_cast<const uint8_t*>(scalar.view().data());
+    this->buffers[1].data = const_cast<uint8_t*>(scalar_data);
+    this->buffers[1].size = scalar.type->byte_width();
+  } else {
+    // TODO(wesm): implement for other types
+    DCHECK(false) << "need to implement for other types";
+  }
+}
+
+int64_t ArraySpan::GetNullCount() const {
+  int64_t precomputed = this->null_count;
+  if (ARROW_PREDICT_FALSE(precomputed == kUnknownNullCount)) {
+    if (this->buffers[0].data != nullptr) {
+      precomputed =
+          this->length - CountSetBits(this->buffers[0].data, this->offset, this->length);
+    } else {
+      precomputed = 0;
+    }
+    this->null_count = precomputed;
+  }
+  return precomputed;
+}
+
+int GetNumBuffers(const DataType& type) {
+  switch (type.id()) {
+    case Type::NA:
+      return 0;
+    case Type::STRUCT:
+    case Type::FIXED_SIZE_LIST:
+      return 1;
+    case Type::BINARY:
+    case Type::LARGE_BINARY:
+    case Type::STRING:
+    case Type::LARGE_STRING:
+    case Type::DENSE_UNION:
+      return 3;
+    case Type::EXTENSION:
+      // The number of buffers depends on the storage type
+      return GetNumBuffers(
+          *internal::checked_cast<const ExtensionType&>(type).storage_type());
+    default:
+      // Everything else has 2 buffers
+      return 2;
+  }
+}
+
+int ArraySpan::num_buffers() const { return GetNumBuffers(*this->type); }
+
+std::shared_ptr<ArrayData> ArraySpan::ToArrayData() const {
+  auto result = std::make_shared<ArrayData>(this->type->Copy(), this->length,
+                                            kUnknownNullCount, this->offset);

Review Comment:
   For the record, why not pass `this->null_count`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org