You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2023/04/28 16:45:47 UTC

[GitHub] [arrow-adbc] paleolimbot commented on a diff in pull request #626: fix(c/driver/postgresql): properly handle NULLs

paleolimbot commented on code in PR #626:
URL: https://github.com/apache/arrow-adbc/pull/626#discussion_r1180614698


##########
c/driver/postgresql/statement.cc:
##########
@@ -496,109 +494,89 @@ int TupleReader::AppendNext(struct ArrowSchemaView* fields, const char* buf, int
     int32_t field_length = LoadNetworkInt32(buf);
     buf += sizeof(int32_t);
 
-    struct ArrowBitmap* bitmap = ArrowArrayValidityBitmap(out->children[col]);
-
     // TODO: set error message here
-    CHECK_NA(ArrowBitmapAppend(bitmap, field_length >= 0, 1));
-
-    switch (fields[col].type) {
-      case NANOARROW_TYPE_BOOL: {
-        // DCHECK_EQ(field_length, 1);
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        uint8_t raw_value = buf[0];
-        buf += 1;
-
-        if (raw_value != 0 && raw_value != 1) {
-          last_error_ = StringBuilder("[libpq] Column #", col + 1, " (\"",
-                                      schema_.children[col]->name,
-                                      "\"): invalid BOOL value ", raw_value);
-          return EINVAL;
-        }
+    if (field_length != kNullFieldLength) {
+      CHECK_NA(AppendValue(fields, buf, col, *row_count, field_length, out));
+    } else {
+      CHECK_NA(ArrowArrayAppendNull(out->children[col], 1));
+    }
+  }
+  (*row_count)++;
+  return 0;
+}
 
-        int64_t bytes_required = _ArrowRoundUpToMultipleOf8(*row_count + 1) / 8;
-        if (bytes_required > buffer->size_bytes) {
-          CHECK_NA(ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
-        }
-        ArrowBitsSetTo(buffer->data, *row_count, 1, raw_value);
-        break;
-      }
-      case NANOARROW_TYPE_DOUBLE: {
-        // DCHECK_EQ(field_length, 8);
-        static_assert(sizeof(double) == sizeof(uint64_t),
-                      "Float is not same size as uint64_t");
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        uint64_t raw_value = LoadNetworkUInt64(buf);
-        buf += sizeof(uint64_t);
-        double value = 0.0;
-        std::memcpy(&value, &raw_value, sizeof(double));
-        CHECK_NA(ArrowBufferAppendDouble(buffer, value));
-        break;
-      }
-      case NANOARROW_TYPE_FLOAT: {
-        // DCHECK_EQ(field_length, 4);
-        static_assert(sizeof(float) == sizeof(uint32_t),
-                      "Float is not same size as uint32_t");
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        uint32_t raw_value = LoadNetworkUInt32(buf);
-        buf += sizeof(uint32_t);
-        float value = 0.0;
-        std::memcpy(&value, &raw_value, sizeof(float));
-        CHECK_NA(ArrowBufferAppendFloat(buffer, value));
-        break;
-      }
-      case NANOARROW_TYPE_INT16: {
-        // DCHECK_EQ(field_length, 2);
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        int32_t value = LoadNetworkInt16(buf);
-        buf += sizeof(int32_t);
-        CHECK_NA(ArrowBufferAppendInt16(buffer, value));
-        break;
-      }
-      case NANOARROW_TYPE_INT32: {
-        // DCHECK_EQ(field_length, 4);
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        int32_t value = LoadNetworkInt32(buf);
-        buf += sizeof(int32_t);
-        CHECK_NA(ArrowBufferAppendInt32(buffer, value));
-        break;
-      }
-      case NANOARROW_TYPE_INT64: {
-        // DCHECK_EQ(field_length, 8);
-        struct ArrowBuffer* buffer = ArrowArrayBuffer(out->children[col], 1);
-        int64_t value = field_length < 0 ? 0 : LoadNetworkInt64(buf);
-        buf += sizeof(int64_t);
-        CHECK_NA(ArrowBufferAppendInt64(buffer, value));
-        break;
-      }
-      case NANOARROW_TYPE_BINARY: {
-        struct ArrowBuffer* offset = ArrowArrayBuffer(out->children[col], 1);
-        struct ArrowBuffer* data = ArrowArrayBuffer(out->children[col], 2);
-        const int32_t last_offset =
-            reinterpret_cast<const int32_t*>(offset->data)[*row_count];
-        CHECK_NA(ArrowBufferAppendInt32(offset, last_offset + field_length));
-        CHECK_NA(ArrowBufferAppend(data, buf, field_length));
-        buf += field_length;
-        break;
-      }
-      case NANOARROW_TYPE_STRING: {
-        // textsend() in varlena.c
-        struct ArrowBuffer* offset = ArrowArrayBuffer(out->children[col], 1);
-        struct ArrowBuffer* data = ArrowArrayBuffer(out->children[col], 2);
-        const int32_t last_offset =
-            reinterpret_cast<const int32_t*>(offset->data)[*row_count];
-        CHECK_NA(ArrowBufferAppendInt32(offset, last_offset + field_length));
-        CHECK_NA(ArrowBufferAppend(data, buf, field_length));
-        buf += field_length;
-        break;
-      }
-      default:
+int TupleReader::AppendValue(struct ArrowSchemaView* fields, const char* buf, int col,
+                             int64_t row_count, int32_t field_length,
+                             struct ArrowArray* out) {
+  switch (fields[col].type) {
+    case NANOARROW_TYPE_BOOL: {
+      uint8_t raw_value = buf[0];
+      buf += 1;
+
+      if (raw_value != 0 && raw_value != 1) {
         last_error_ = StringBuilder("[libpq] Column #", col + 1, " (\"",
                                     schema_.children[col]->name,
-                                    "\") has unsupported type ", fields[col].type);
-        return ENOTSUP;
+                                    "\"): invalid BOOL value ", raw_value);
+        return EINVAL;
+      }
+      CHECK_NA(ArrowArrayAppendInt(out->children[col], raw_value));
+      break;
+    }
+    case NANOARROW_TYPE_DOUBLE: {
+      static_assert(sizeof(double) == sizeof(uint64_t),
+                    "Float is not same size as uint64_t");
+      uint64_t raw_value = LoadNetworkUInt64(buf);
+      buf += sizeof(uint64_t);
+      double value = 0.0;
+      std::memcpy(&value, &raw_value, sizeof(double));
+      CHECK_NA(ArrowArrayAppendDouble(out->children[col], value));
+      break;
+    }
+    case NANOARROW_TYPE_FLOAT: {
+      static_assert(sizeof(float) == sizeof(uint32_t),
+                    "Float is not same size as uint32_t");
+      uint32_t raw_value = LoadNetworkUInt32(buf);
+      buf += sizeof(uint32_t);
+      float value = 0.0;
+      std::memcpy(&value, &raw_value, sizeof(float));
+      CHECK_NA(ArrowArrayAppendDouble(out->children[col], value));
+      break;
     }
+    case NANOARROW_TYPE_INT16: {
+      int32_t value = LoadNetworkInt16(buf);
+      buf += sizeof(int32_t);
+      CHECK_NA(ArrowArrayAppendInt(out->children[col], value));
+      break;
+    }
+    case NANOARROW_TYPE_INT32: {
+      int32_t value = LoadNetworkInt32(buf);
+      buf += sizeof(int32_t);
+      CHECK_NA(ArrowArrayAppendInt(out->children[col], value));
+      break;
+    }
+    case NANOARROW_TYPE_INT64: {
+      int64_t value = field_length < 0 ? 0 : LoadNetworkInt64(buf);

Review Comment:
   Do you need this check anymore?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org