You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "lidavidm (via GitHub)" <gi...@apache.org> on 2023/07/11 15:13:35 UTC

[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #883: feat(c/driver/postgresql): Handle NUMERIC type by converting to string

lidavidm commented on code in PR #883:
URL: https://github.com/apache/arrow-adbc/pull/883#discussion_r1258885026


##########
c/driver/postgresql/postgres_copy_reader.h:
##########
@@ -211,6 +212,172 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader {
   }
 };
 
+// Converts COPY resulting from the Postgres NUMERIC type into a string.
+// Rewritten based on the Postgres implementation of NUMERIC cast to string in
+// src/backend/utils/adt/numeric.c : get_str_from_var() (Note that in the initial source,
+// DEC_DIGITS is always 4 and DBASE is always 10000).
+//
+// Briefly, the Postgres representation of "numeric" is an array of int16_t ("digits")
+// from most significant to least significant. Each "digit" is a value between 0000 and
+// 9999. There are weight + 1 digits before the decimal point and dscale digits after the
+// decimal point. Both of those values can be zero or negative. A "sign" component
+// encodes the positive or negativeness of the value and is also used to encode special
+// values (inf, -inf, and nan).
+class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader {
+ public:
+  ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array,
+                      ArrowError* error) override {
+    // -1 for NULL
+    if (field_size_bytes < 0) {
+      return ArrowArrayAppendNull(array, 1);
+    }
+
+    // Read the input
+    if (data->size_bytes < static_cast<int64_t>(4 * sizeof(int16_t))) {
+      ArrowErrorSet(error,
+                    "Expected at least %d bytes of field data for numeric copy data but "
+                    "only %d bytes of input remain",
+                    static_cast<int>(4 * sizeof(int16_t)),
+                    static_cast<int>(data->size_bytes));  // NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    int16_t ndigits = ReadUnsafe<int16_t>(data);
+    int16_t weight = ReadUnsafe<int16_t>(data);
+    uint16_t sign = ReadUnsafe<uint16_t>(data);
+    uint16_t dscale = ReadUnsafe<uint16_t>(data);
+
+    if (data->size_bytes < static_cast<int64_t>(ndigits * sizeof(int16_t))) {
+      ArrowErrorSet(error,
+                    "Expected at least %d bytes of field data for numeric digits copy "
+                    "data but only %d bytes of input remain",
+                    static_cast<int>(ndigits * sizeof(int16_t)),
+                    static_cast<int>(data->size_bytes));  // NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    digits_.clear();
+    for (int16_t i = 0; i < ndigits; i++) {
+      digits_.push_back(ReadUnsafe<int16_t>(data));
+    }
+
+    // Handle special values
+    std::string special_value;
+    switch (sign) {
+      case kNumericNAN:
+        special_value = std::string("nan");
+        break;
+      case kNumericPinf:
+        special_value = std::string("inf");
+        break;
+      case kNumericNinf:
+        special_value = std::string("-inf");
+        break;
+      case kNumericPos:
+      case kNumericNeg:
+        special_value = std::string("");
+        break;
+      default:
+        ArrowErrorSet(error,
+                      "Unexpected value for sign read from Postgres numeric field: %d",
+                      static_cast<int>(sign));
+        return EINVAL;
+    }
+
+    if (special_value.size() > 0) {

Review Comment:
   nit: `!special_value.empty()`



##########
c/driver/postgresql/postgres_copy_reader.h:
##########
@@ -211,6 +212,172 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader {
   }
 };
 
+// Converts COPY resulting from the Postgres NUMERIC type into a string.
+// Rewritten based on the Postgres implementation of NUMERIC cast to string in
+// src/backend/utils/adt/numeric.c : get_str_from_var() (Note that in the initial source,
+// DEC_DIGITS is always 4 and DBASE is always 10000).
+//
+// Briefly, the Postgres representation of "numeric" is an array of int16_t ("digits")
+// from most significant to least significant. Each "digit" is a value between 0000 and
+// 9999. There are weight + 1 digits before the decimal point and dscale digits after the
+// decimal point. Both of those values can be zero or negative. A "sign" component
+// encodes the positive or negativeness of the value and is also used to encode special
+// values (inf, -inf, and nan).
+class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader {
+ public:
+  ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array,
+                      ArrowError* error) override {
+    // -1 for NULL
+    if (field_size_bytes < 0) {
+      return ArrowArrayAppendNull(array, 1);
+    }
+
+    // Read the input
+    if (data->size_bytes < static_cast<int64_t>(4 * sizeof(int16_t))) {
+      ArrowErrorSet(error,
+                    "Expected at least %d bytes of field data for numeric copy data but "
+                    "only %d bytes of input remain",
+                    static_cast<int>(4 * sizeof(int16_t)),
+                    static_cast<int>(data->size_bytes));  // NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    int16_t ndigits = ReadUnsafe<int16_t>(data);
+    int16_t weight = ReadUnsafe<int16_t>(data);
+    uint16_t sign = ReadUnsafe<uint16_t>(data);
+    uint16_t dscale = ReadUnsafe<uint16_t>(data);
+
+    if (data->size_bytes < static_cast<int64_t>(ndigits * sizeof(int16_t))) {
+      ArrowErrorSet(error,
+                    "Expected at least %d bytes of field data for numeric digits copy "
+                    "data but only %d bytes of input remain",
+                    static_cast<int>(ndigits * sizeof(int16_t)),
+                    static_cast<int>(data->size_bytes));  // NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    digits_.clear();
+    for (int16_t i = 0; i < ndigits; i++) {
+      digits_.push_back(ReadUnsafe<int16_t>(data));
+    }
+
+    // Handle special values
+    std::string special_value;
+    switch (sign) {
+      case kNumericNAN:
+        special_value = std::string("nan");
+        break;
+      case kNumericPinf:
+        special_value = std::string("inf");
+        break;
+      case kNumericNinf:
+        special_value = std::string("-inf");
+        break;
+      case kNumericPos:
+      case kNumericNeg:
+        special_value = std::string("");
+        break;
+      default:
+        ArrowErrorSet(error,
+                      "Unexpected value for sign read from Postgres numeric field: %d",
+                      static_cast<int>(sign));
+        return EINVAL;
+    }
+
+    if (special_value.size() > 0) {
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppend(data_, special_value.data(), special_value.size()));
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, data_->size_bytes));
+      return AppendValid(array);
+    }
+
+    // Calculate string space requirement
+    int64_t max_chars_required = std::max<int64_t>(1, weight + 1 * kDecDigits);

Review Comment:
   is the `1 *` necessary?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org