You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by gi...@apache.org on 2023/11/22 01:30:19 UTC

(arrow-nanoarrow) branch main updated: Update dist/ for commit cb2aa71b385147d85eff91ed87a418cefaa3c8bd

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 79cde01  Update dist/ for commit cb2aa71b385147d85eff91ed87a418cefaa3c8bd
79cde01 is described below

commit 79cde0190768771282652bcbf75ee42697590155
Author: GitHub Actions <ac...@github.com>
AuthorDate: Wed Nov 22 01:30:14 2023 +0000

    Update dist/ for commit cb2aa71b385147d85eff91ed87a418cefaa3c8bd
---
 dist/nanoarrow_testing.hpp | 389 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 389 insertions(+)

diff --git a/dist/nanoarrow_testing.hpp b/dist/nanoarrow_testing.hpp
new file mode 100644
index 0000000..aa6200a
--- /dev/null
+++ b/dist/nanoarrow_testing.hpp
@@ -0,0 +1,389 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <string>
+
+#include "nanoarrow.hpp"
+
+#ifndef NANOARROW_TESTING_HPP_INCLUDED
+#define NANOARROW_TESTING_HPP_INCLUDED
+
+/// \defgroup nanoarrow_testing Nanoarrow Testing Helpers
+///
+/// Utilities for testing nanoarrow structures and functions.
+
+namespace nanoarrow {
+
+namespace testing {
+
+/// \defgroup nanoarrow_testing-json Integration test helpers
+///
+/// See testing format documentation for details of the JSON representation. This
+/// representation is not canonical but can be used to implement integration tests with
+/// other implementations.
+///
+/// @{
+
+/// \brief Writer for the Arrow integration testing JSON format
+class TestingJSONWriter {
+ public:
+  /// \brief Write a "batch" to out
+  ///
+  /// Creates output like `{"count": 123, "columns": [...]}`.
+  ArrowErrorCode WriteBatch(std::ostream& out, const ArrowSchema* schema,
+                            ArrowArrayView* value) {
+    // Make sure we have a struct
+    if (std::string(schema->format) != "+s") {
+      return EINVAL;
+    }
+
+    out << "{";
+
+    // Write length
+    out << R"("count": )" << value->length;
+
+    // Write children
+    out << R"(, "columns": )";
+    NANOARROW_RETURN_NOT_OK(WriteChildren(out, schema, value));
+
+    out << "}";
+    return NANOARROW_OK;
+  }
+
+  /// \brief Write a column to out
+  ///
+  /// Creates output like `{"name": "col", "count": 123, "VALIDITY": [...], ...}`.
+  ArrowErrorCode WriteColumn(std::ostream& out, const ArrowSchema* field,
+                             ArrowArrayView* value) {
+    out << "{";
+
+    // Write schema->name (may be null)
+    if (field->name == nullptr) {
+      out << R"("name": null)";
+    } else {
+      out << R"("name": ")" << field->name << R"(")";
+    }
+
+    // Write length
+    out << R"(, "count": )" << value->length;
+
+    // Write the VALIDITY element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_NA:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        break;
+      default:
+        out << R"(, "VALIDITY": )";
+        WriteBitmap(out, value->buffer_views[0].data.as_uint8, value->length);
+        break;
+    }
+
+    // Write the TYPE_ID element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_SPARSE_UNION:
+      case NANOARROW_TYPE_DENSE_UNION:
+        out << R"(, "TYPE_ID": )";
+        NANOARROW_RETURN_NOT_OK(WriteOffsetOrTypeID<int8_t>(out, value->buffer_views[0]));
+        break;
+      default:
+        break;
+    }
+
+    // Write the OFFSET element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_BINARY:
+      case NANOARROW_TYPE_STRING:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_LIST:
+        out << R"(, "OFFSET": )";
+        NANOARROW_RETURN_NOT_OK(
+            WriteOffsetOrTypeID<int32_t>(out, value->buffer_views[1]));
+        break;
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_LARGE_STRING:
+        out << R"(, "OFFSET": )";
+        NANOARROW_RETURN_NOT_OK(
+            WriteOffsetOrTypeID<int64_t>(out, value->buffer_views[1]));
+        break;
+      default:
+        break;
+    }
+
+    // Write the DATA element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_NA:
+      case NANOARROW_TYPE_STRUCT:
+      case NANOARROW_TYPE_LIST:
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        break;
+      default:
+        out << R"(, "DATA": )";
+        NANOARROW_RETURN_NOT_OK(WriteData(out, value));
+        break;
+    }
+
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_STRUCT:
+      case NANOARROW_TYPE_LIST:
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        out << R"(, "children": )";
+        NANOARROW_RETURN_NOT_OK(WriteChildren(out, field, value));
+        break;
+      default:
+        break;
+    }
+
+    out << "}";
+    return NANOARROW_OK;
+  }
+
+ private:
+  void WriteBitmap(std::ostream& out, const uint8_t* bits, int64_t length) {
+    if (length == 0) {
+      out << "[]";
+      return;
+    }
+
+    out << "[";
+
+    if (bits == nullptr) {
+      out << "1";
+      for (int64_t i = 1; i < length; i++) {
+        out << ", 1";
+      }
+    } else {
+      out << static_cast<int32_t>(ArrowBitGet(bits, 0));
+      for (int64_t i = 1; i < length; i++) {
+        out << ", " << static_cast<int32_t>(ArrowBitGet(bits, i));
+      }
+    }
+
+    out << "]";
+  }
+
+  template <typename T>
+  ArrowErrorCode WriteOffsetOrTypeID(std::ostream& out, ArrowBufferView content) {
+    if (content.size_bytes == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    const T* values = reinterpret_cast<const T*>(content.data.data);
+    int64_t n_values = content.size_bytes / sizeof(T);
+
+    out << "[";
+
+    if (sizeof(T) == sizeof(int64_t)) {
+      // Ensure int64s are quoted (i.e, "123456")
+      out << R"(")" << values[0] << R"(")";
+      for (int64_t i = 1; i < n_values; i++) {
+        out << R"(, ")" << values[i] << R"(")";
+      }
+    } else {
+      // No need to quote smaller ints (i.e., 123456)
+      out << values[0];
+      for (int64_t i = 1; i < n_values; i++) {
+        out << ", " << static_cast<int64_t>(values[i]);
+      }
+    }
+
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteData(std::ostream& out, ArrowArrayView* value) {
+    if (value->length == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    out << "[";
+
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_BOOL:
+      case NANOARROW_TYPE_INT8:
+      case NANOARROW_TYPE_UINT8:
+      case NANOARROW_TYPE_INT16:
+      case NANOARROW_TYPE_UINT16:
+      case NANOARROW_TYPE_INT32:
+      case NANOARROW_TYPE_UINT32:
+        // Regular JSON integers (i.e., 123456)
+        out << ArrowArrayViewGetIntUnsafe(value, 0);
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", " << ArrowArrayViewGetIntUnsafe(value, i);
+        }
+        break;
+      case NANOARROW_TYPE_INT64:
+        // Quoted integers to avoid overflow (i.e., "123456")
+        out << R"(")" << ArrowArrayViewGetIntUnsafe(value, 0) << R"(")";
+        for (int64_t i = 1; i < value->length; i++) {
+          out << R"(, ")" << ArrowArrayViewGetIntUnsafe(value, i) << R"(")";
+        }
+        break;
+      case NANOARROW_TYPE_UINT64:
+        // Quoted integers to avoid overflow (i.e., "123456")
+        out << R"(")" << ArrowArrayViewGetUIntUnsafe(value, 0) << R"(")";
+        for (int64_t i = 1; i < value->length; i++) {
+          out << R"(, ")" << ArrowArrayViewGetUIntUnsafe(value, i) << R"(")";
+        }
+        break;
+
+      case NANOARROW_TYPE_FLOAT:
+      case NANOARROW_TYPE_DOUBLE: {
+        // JSON number to 3 decimal places
+        LocalizedStream local_stream_opt(out);
+        local_stream_opt.SetFixed(3);
+
+        out << ArrowArrayViewGetDoubleUnsafe(value, 0);
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", " << ArrowArrayViewGetDoubleUnsafe(value, i);
+        }
+        break;
+      }
+
+      case NANOARROW_TYPE_STRING:
+      case NANOARROW_TYPE_LARGE_STRING:
+        NANOARROW_RETURN_NOT_OK(
+            WriteString(out, ArrowArrayViewGetStringUnsafe(value, 0)));
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", ";
+          NANOARROW_RETURN_NOT_OK(
+              WriteString(out, ArrowArrayViewGetStringUnsafe(value, i)));
+        }
+        break;
+
+      case NANOARROW_TYPE_BINARY:
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_FIXED_SIZE_BINARY: {
+        NANOARROW_RETURN_NOT_OK(WriteBytes(out, ArrowArrayViewGetBytesUnsafe(value, 0)));
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", ";
+          NANOARROW_RETURN_NOT_OK(
+              WriteBytes(out, ArrowArrayViewGetBytesUnsafe(value, i)));
+        }
+        break;
+      }
+
+      default:
+        // Not supported
+        return ENOTSUP;
+    }
+
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteString(std::ostream& out, ArrowStringView value) {
+    out << R"(")";
+
+    for (int64_t i = 0; i < value.size_bytes; i++) {
+      char c = value.data[i];
+      if (c == '"') {
+        out << R"(\")";
+      } else if (c == '\\') {
+        out << R"(\\)";
+      } else if (c < 0) {
+        // Not supporting multibyte unicode yet
+        return ENOTSUP;
+      } else if (c < 20) {
+        // Data in the arrow-testing repo has a lot of content that requires escaping
+        // in this way (\uXXXX).
+        uint16_t utf16_bytes = static_cast<uint16_t>(c);
+
+        char utf16_esc[7];
+        utf16_esc[6] = '\0';
+        snprintf(utf16_esc, sizeof(utf16_esc), R"(\u%04x)", utf16_bytes);
+        out << utf16_esc;
+      } else {
+        out << c;
+      }
+    }
+
+    out << R"(")";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteBytes(std::ostream& out, ArrowBufferView value) {
+    out << R"(")";
+    char hex[3];
+    hex[2] = '\0';
+
+    for (int64_t i = 0; i < value.size_bytes; i++) {
+      snprintf(hex, sizeof(hex), "%02X", static_cast<int>(value.data.as_uint8[i]));
+      out << hex;
+    }
+    out << R"(")";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteChildren(std::ostream& out, const ArrowSchema* field,
+                               ArrowArrayView* value) {
+    if (field->n_children == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    out << "[";
+    NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[0], value->children[0]));
+    for (int64_t i = 1; i < field->n_children; i++) {
+      out << ", ";
+      NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[i], value->children[i]));
+    }
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  class LocalizedStream {
+   public:
+    LocalizedStream(std::ostream& out) : out_(out) {
+      previous_locale_ = out.imbue(std::locale::classic());
+      previous_precision_ = out.precision();
+      fmt_flags_ = out.flags();
+      out.setf(out.fixed);
+    }
+
+    void SetFixed(int precision) { out_.precision(precision); }
+
+    ~LocalizedStream() {
+      out_.flags(fmt_flags_);
+      out_.precision(previous_precision_);
+      out_.imbue(previous_locale_);
+    }
+
+   private:
+    std::ostream& out_;
+    std::locale previous_locale_;
+    std::ios::fmtflags fmt_flags_;
+    std::streamsize previous_precision_;
+  };
+};
+
+/// @}
+
+}  // namespace testing
+}  // namespace nanoarrow
+
+#endif