You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by gi...@apache.org on 2023/12/02 01:26:39 UTC

(arrow-nanoarrow) branch main updated: Update dist/ for commit 5c3f5d546bdb25625350926bf16f736837733901

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c8009d8  Update dist/ for commit 5c3f5d546bdb25625350926bf16f736837733901
c8009d8 is described below

commit c8009d8119e62dc0f0a3edb16b93c31a99acbcde
Author: GitHub Actions <ac...@github.com>
AuthorDate: Sat Dec 2 01:26:35 2023 +0000

    Update dist/ for commit 5c3f5d546bdb25625350926bf16f736837733901
---
 dist/nanoarrow_testing.hpp | 161 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)

diff --git a/dist/nanoarrow_testing.hpp b/dist/nanoarrow_testing.hpp
index 103f22e..15580b6 100644
--- a/dist/nanoarrow_testing.hpp
+++ b/dist/nanoarrow_testing.hpp
@@ -46,6 +46,48 @@ namespace testing {
 /// \brief Writer for the Arrow integration testing JSON format
 class TestingJSONWriter {
  public:
+  /// \brief Write an ArrowArrayStream as a data file JSON object to out
+  ///
+  /// Creates output like `{"schema": {...}, "batches": [...], ...}`.
+  ArrowErrorCode WriteDataFile(std::ostream& out, ArrowArrayStream* stream) {
+    if (stream == nullptr || stream->release == nullptr) {
+      return EINVAL;
+    }
+
+    out << R"({"schema": )";
+
+    nanoarrow::UniqueSchema schema;
+    NANOARROW_RETURN_NOT_OK(stream->get_schema(stream, schema.get()));
+    NANOARROW_RETURN_NOT_OK(WriteSchema(out, schema.get()));
+
+    nanoarrow::UniqueArrayView array_view;
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+    out << R"(, "batches": [)";
+
+    nanoarrow::UniqueArray array;
+    std::string sep;
+    do {
+      NANOARROW_RETURN_NOT_OK(stream->get_next(stream, array.get()));
+      if (array->release == nullptr) {
+        break;
+      }
+
+      NANOARROW_RETURN_NOT_OK(
+          ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+      out << sep;
+      sep = ", ";
+      NANOARROW_RETURN_NOT_OK(WriteBatch(out, schema.get(), array_view.get()));
+      array.reset();
+    } while (true);
+
+    out << "]}";
+
+    return NANOARROW_OK;
+  }
+
   /// \brief Write a schema to out
   ///
   /// Creates output like `{"fields": [...], "metadata": [...]}`.
@@ -622,6 +664,57 @@ class TestingJSONReader {
   using json = nlohmann::json;
 
  public:
+  /// \brief Read JSON representing a data file object
+  ///
+  /// Read a JSON object in the form `{"schema": {...}, "batches": [...], ...}`,
+  /// propagating `out` on success.
+  ArrowErrorCode ReadDataFile(const std::string& data_file_json, ArrowArrayStream* out,
+                              ArrowError* error = nullptr) {
+    try {
+      auto obj = json::parse(data_file_json);
+      NANOARROW_RETURN_NOT_OK(Check(obj.is_object(), error, "data file must be object"));
+      NANOARROW_RETURN_NOT_OK(
+          Check(obj.contains("schema"), error, "data file missing key 'schema'"));
+
+      // Read Schema
+      nanoarrow::UniqueSchema schema;
+      NANOARROW_RETURN_NOT_OK(SetSchema(schema.get(), obj["schema"], error));
+
+      NANOARROW_RETURN_NOT_OK(
+          Check(obj.contains("batches"), error, "data file missing key 'batches'"));
+      const auto& batches = obj["batches"];
+      NANOARROW_RETURN_NOT_OK(
+          Check(batches.is_array(), error, "data file batches must be array"));
+
+      // Populate ArrayView
+      nanoarrow::UniqueArrayView array_view;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), error));
+
+      // Initialize ArrayStream with required capacity
+      nanoarrow::UniqueArrayStream stream;
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowBasicArrayStreamInit(stream.get(), schema.get(), batches.size()), error);
+
+      // Populate ArrayStream batches
+      for (size_t i = 0; i < batches.size(); i++) {
+        nanoarrow::UniqueArray array;
+        NANOARROW_RETURN_NOT_OK(
+            ArrowArrayInitFromArrayView(array.get(), array_view.get(), error));
+        NANOARROW_RETURN_NOT_OK(
+            SetArrayBatch(batches[i], array_view.get(), array.get(), error));
+        ArrowBasicArrayStreamSetArray(stream.get(), i, array.get());
+      }
+
+      ArrowArrayStreamMove(stream.get(), out);
+      return NANOARROW_OK;
+    } catch (json::exception& e) {
+      ArrowErrorSet(error, "Exception in TestingJSONReader::ReadDataFile(): %s",
+                    e.what());
+      return EINVAL;
+    }
+  }
+
   /// \brief Read JSON representing a Schema
   ///
   /// Reads a JSON object in the form `{"fields": [...], "metadata": [...]}`,
@@ -660,6 +753,34 @@ class TestingJSONReader {
     }
   }
 
+  /// \brief Read JSON representing a RecordBatch
+  ///
+  /// Read a JSON object in the form `{"count": 123, "columns": [...]}`, propagating `out`
+  /// on success.
+  ArrowErrorCode ReadBatch(const std::string& batch_json, const ArrowSchema* schema,
+                           ArrowArray* out, ArrowError* error = nullptr) {
+    try {
+      auto obj = json::parse(batch_json);
+
+      // ArrowArrayView to enable validation
+      nanoarrow::UniqueArrayView array_view;
+      NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(
+          array_view.get(), const_cast<ArrowSchema*>(schema), error));
+
+      // ArrowArray to hold memory
+      nanoarrow::UniqueArray array;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowArrayInitFromSchema(array.get(), const_cast<ArrowSchema*>(schema), error));
+
+      NANOARROW_RETURN_NOT_OK(SetArrayBatch(obj, array_view.get(), array.get(), error));
+      ArrowArrayMove(array.get(), out);
+      return NANOARROW_OK;
+    } catch (json::exception& e) {
+      ArrowErrorSet(error, "Exception in TestingJSONReader::ReadBatch(): %s", e.what());
+      return EINVAL;
+    }
+  }
+
   /// \brief Read JSON representing a Column
   ///
   /// Read a JSON object in the form
@@ -1094,6 +1215,46 @@ class TestingJSONReader {
     return NANOARROW_OK;
   }
 
+  ArrowErrorCode SetArrayBatch(const json& value, ArrowArrayView* array_view,
+                               ArrowArray* array, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_object(), error, "Expected RecordBatch to be a JSON object"));
+
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("count"), error, "RecordBatch missing key 'count'"));
+
+    const auto& count = value["count"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(count.is_number_integer(), error, "RecordBatch count must be integer"));
+    array_view->length = count.get<int64_t>();
+
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("columns"), error, "RecordBatch missing key 'columns'"));
+
+    const auto& columns = value["columns"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(columns.is_array(), error, "RecordBatch columns must be array"));
+    NANOARROW_RETURN_NOT_OK(Check(columns.size() == array_view->n_children, error,
+                                  "RecordBatch children has incorrect size"));
+
+    for (int64_t i = 0; i < array_view->n_children; i++) {
+      NANOARROW_RETURN_NOT_OK(
+          SetArrayColumn(columns[i], array_view->children[i], array->children[i], error));
+    }
+
+    // Validate the array view
+    NANOARROW_RETURN_NOT_OK(PrefixError(
+        ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL, error), error,
+        "RecordBatch failed to validate: "));
+
+    // Flush length and buffer pointers to the Array
+    array->length = array_view->length;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE, nullptr), error);
+
+    return NANOARROW_OK;
+  }
+
   ArrowErrorCode SetArrayColumn(const json& value, ArrowArrayView* array_view,
                                 ArrowArray* array, ArrowError* error,
                                 const std::string& parent_error_prefix = "") {