You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/11/27 18:28:42 UTC

(arrow-nanoarrow) branch main updated: feat: Add integration testing JSON reader (#322)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d37d2b0  feat: Add integration testing JSON reader (#322)
d37d2b0 is described below

commit d37d2b0dd2d37ee682582d01c391a1aec5e43d1f
Author: Dewey Dunnington <de...@dunnington.ca>
AuthorDate: Mon Nov 27 14:28:35 2023 -0400

    feat: Add integration testing JSON reader (#322)
    
    This PR adds a `TestingJSONReader`. It uses
    https://github.com/nlohmann/json to do the parsing. This doesn't
    implement parsing a record batch or array yet, and there are a few
    missing types (the same missing types as the writer). I'd like to do
    those as a follow-up.
    
    The testing for this is mostly roundtrip testing, which is maybe not
    ideal but also maybe OK because the writer is an independent
    implementation. When the ability to compare schemas for equality gets
    added that can be tacked on to the roundtrip test utility function as
    well.
---
 CMakeLists.txt                          |  16 +-
 src/nanoarrow/nanoarrow_testing.hpp     | 450 ++++++++++++++++++++++++++++++++
 src/nanoarrow/nanoarrow_testing_test.cc | 279 ++++++++++++++++++++
 3 files changed, 744 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cffa590..a48769f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,16 @@ if(NANOARROW_BUILD_TESTS)
 
   fetchcontent_makeavailable(googletest)
 
+  # JSON library for integration testing
+  # Also used by some versions of Arrow, so check if this is already available
+  if(NOT TARGET nlohmann_json::nlohmann_json)
+    fetchcontent_declare(nlohmann_json
+                         URL https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.zip
+                         URL_HASH SHA256=95651d7d1fcf2e5c3163c3d37df6d6b3e9e5027299e6bd050d157322ceda9ac9
+    )
+    fetchcontent_makeavailable(nlohmann_json)
+  endif()
+
   add_executable(utils_test src/nanoarrow/utils_test.cc)
   add_executable(buffer_test src/nanoarrow/buffer_test.cc)
   add_executable(array_test src/nanoarrow/array_test.cc)
@@ -235,7 +245,11 @@ if(NANOARROW_BUILD_TESTS)
                         coverage_config)
   target_link_libraries(array_stream_test nanoarrow gtest_main coverage_config)
   target_link_libraries(nanoarrow_hpp_test nanoarrow gtest_main coverage_config)
-  target_link_libraries(nanoarrow_testing_test nanoarrow gtest_main coverage_config)
+  target_link_libraries(nanoarrow_testing_test
+                        nanoarrow
+                        gtest_main
+                        nlohmann_json::nlohmann_json
+                        coverage_config)
 
   include(GoogleTest)
   # Some users have reported a timeout with the default value of 5
diff --git a/src/nanoarrow/nanoarrow_testing.hpp b/src/nanoarrow/nanoarrow_testing.hpp
index 0fc590d..454a6da 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -16,8 +16,11 @@
 // under the License.
 
 #include <iostream>
+#include <sstream>
 #include <string>
 
+#include <nlohmann/json.hpp>
+
 #include "nanoarrow.hpp"
 
 #ifndef NANOARROW_TESTING_HPP_INCLUDED
@@ -613,6 +616,453 @@ class TestingJSONWriter {
   };
 };
 
+/// \brief Reader for the Arrow integration testing JSON format
+class TestingJSONReader {
+  using json = nlohmann::json;
+
+ public:
+  ArrowErrorCode ReadSchema(const std::string& value, ArrowSchema* out,
+                            ArrowError* error = nullptr) {
+    try {
+      auto obj = json::parse(value);
+      nanoarrow::UniqueSchema schema;
+
+      NANOARROW_RETURN_NOT_OK(SetSchema(schema.get(), obj, error));
+      ArrowSchemaMove(schema.get(), out);
+      return NANOARROW_OK;
+    } catch (std::exception& e) {
+      ArrowErrorSet(error, "Exception in TestingJSONReader::ReadSchema(): %s", e.what());
+      return EINVAL;
+    }
+  }
+
+  ArrowErrorCode ReadField(const std::string& value, ArrowSchema* out,
+                           ArrowError* error = nullptr) {
+    try {
+      auto obj = json::parse(value);
+      nanoarrow::UniqueSchema schema;
+
+      NANOARROW_RETURN_NOT_OK(SetField(schema.get(), obj, error));
+      ArrowSchemaMove(schema.get(), out);
+      return NANOARROW_OK;
+    } catch (std::exception& e) {
+      ArrowErrorSet(error, "Exception in TestingJSONReader::ReadField(): %s", e.what());
+      return EINVAL;
+    }
+  }
+
+ private:
+  ArrowErrorCode SetSchema(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_object(), error, "Expected Schema to be a JSON object"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("fields"), error, "Schema missing key 'fields'"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("metadata"), error, "Schema missing key 'metadata'"));
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRUCT), error);
+
+    const auto& fields = value["fields"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(fields.is_array(), error, "Schema fields must be array"));
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaAllocateChildren(schema, fields.size()),
+                                       error);
+    for (int64_t i = 0; i < schema->n_children; i++) {
+      NANOARROW_RETURN_NOT_OK(SetField(schema->children[i], fields[i], error));
+    }
+
+    NANOARROW_RETURN_NOT_OK(SetMetadata(schema, value["metadata"], error));
+
+    // Validate!
+    ArrowSchemaView schema_view;
+    NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error));
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetField(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_object(), error, "Expected Field to be a JSON object"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("name"), error, "Field missing key 'name'"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("nullable"), error, "Field missing key 'nullable'"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("type"), error, "Field missing key 'type'"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("children"), error, "Field missing key 'children'"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("metadata"), error, "Field missing key 'metadata'"));
+
+    ArrowSchemaInit(schema);
+
+    const auto& name = value["name"];
+    NANOARROW_RETURN_NOT_OK(Check(name.is_string() || name.is_null(), error,
+                                  "Field name must be string or null"));
+    if (name.is_string()) {
+      auto name_str = name.get<std::string>();
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetName(schema, name_str.c_str()),
+                                         error);
+    }
+
+    const auto& nullable = value["nullable"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(nullable.is_boolean(), error, "Field nullable must be boolean"));
+    if (nullable.get<bool>()) {
+      schema->flags |= ARROW_FLAG_NULLABLE;
+    } else {
+      schema->flags &= ~ARROW_FLAG_NULLABLE;
+    }
+
+    NANOARROW_RETURN_NOT_OK(SetType(schema, value["type"], error));
+
+    const auto& children = value["children"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(children.is_array(), error, "Field children must be array"));
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaAllocateChildren(schema, children.size()), error);
+    for (int64_t i = 0; i < schema->n_children; i++) {
+      NANOARROW_RETURN_NOT_OK(SetField(schema->children[i], children[i], error));
+    }
+
+    NANOARROW_RETURN_NOT_OK(SetMetadata(schema, value["metadata"], error));
+
+    // Validate!
+    ArrowSchemaView schema_view;
+    NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error));
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetType(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.is_object(), error, "Type must be object"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("name"), error, "Type missing key 'name'"));
+
+    const auto& name = value["name"];
+    NANOARROW_RETURN_NOT_OK(Check(name.is_string(), error, "Type name must be string"));
+    auto name_str = name.get<std::string>();
+
+    if (name_str == "null") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetType(schema, NANOARROW_TYPE_NA),
+                                         error);
+    } else if (name_str == "bool") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL),
+                                         error);
+    } else if (name_str == "int") {
+      NANOARROW_RETURN_NOT_OK(SetTypeInt(schema, value, error));
+    } else if (name_str == "floatingpoint") {
+      NANOARROW_RETURN_NOT_OK(SetTypeFloatingPoint(schema, value, error));
+    } else if (name_str == "decimal") {
+      NANOARROW_RETURN_NOT_OK(SetTypeDecimal(schema, value, error));
+    } else if (name_str == "utf8") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING), error);
+    } else if (name_str == "largeutf8") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowSchemaSetType(schema, NANOARROW_TYPE_LARGE_STRING), error);
+    } else if (name_str == "binary") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY), error);
+    } else if (name_str == "largebinary") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowSchemaSetType(schema, NANOARROW_TYPE_LARGE_BINARY), error);
+    } else if (name_str == "fixedsizebinary") {
+      NANOARROW_RETURN_NOT_OK(SetTypeFixedSizeBinary(schema, value, error));
+    } else if (name_str == "list") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetFormat(schema, "+l"), error);
+    } else if (name_str == "largelist") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetFormat(schema, "+L"), error);
+    } else if (name_str == "fixedsizelist") {
+      NANOARROW_RETURN_NOT_OK(SetTypeFixedSizeList(schema, value, error));
+    } else if (name_str == "map") {
+      NANOARROW_RETURN_NOT_OK(SetTypeMap(schema, value, error));
+    } else if (name_str == "union") {
+      NANOARROW_RETURN_NOT_OK(SetTypeUnion(schema, value, error));
+    } else if (name_str == "struct") {
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetFormat(schema, "+s"), error);
+    } else {
+      ArrowErrorSet(error, "Unsupported Type name: '%s'", name_str.c_str());
+      return ENOTSUP;
+    }
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeInt(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("bitWidth"), error,
+                                  "Type[name=='int'] missing key 'bitWidth'"));
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("isSigned"), error,
+                                  "Type[name=='int'] missing key 'isSigned'"));
+
+    const auto& bitwidth = value["bitWidth"];
+    NANOARROW_RETURN_NOT_OK(Check(bitwidth.is_number_integer(), error,
+                                  "Type[name=='int'] bitWidth must be integer"));
+
+    const auto& issigned = value["isSigned"];
+    NANOARROW_RETURN_NOT_OK(Check(issigned.is_boolean(), error,
+                                  "Type[name=='int'] isSigned must be boolean"));
+
+    ArrowType type = NANOARROW_TYPE_UNINITIALIZED;
+    if (issigned.get<bool>()) {
+      switch (bitwidth.get<int>()) {
+        case 8:
+          type = NANOARROW_TYPE_INT8;
+          break;
+        case 16:
+          type = NANOARROW_TYPE_INT16;
+          break;
+        case 32:
+          type = NANOARROW_TYPE_INT32;
+          break;
+        case 64:
+          type = NANOARROW_TYPE_INT64;
+          break;
+        default:
+          ArrowErrorSet(error, "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
+          return EINVAL;
+      }
+    } else {
+      switch (bitwidth.get<int>()) {
+        case 8:
+          type = NANOARROW_TYPE_UINT8;
+          break;
+        case 16:
+          type = NANOARROW_TYPE_UINT16;
+          break;
+        case 32:
+          type = NANOARROW_TYPE_UINT32;
+          break;
+        case 64:
+          type = NANOARROW_TYPE_UINT64;
+          break;
+        default:
+          ArrowErrorSet(error, "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
+          return EINVAL;
+      }
+    }
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetType(schema, type), error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeFloatingPoint(ArrowSchema* schema, const json& value,
+                                      ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("precision"), error,
+                                  "Type[name=='floatingpoint'] missing key 'precision'"));
+
+    const auto& precision = value["precision"];
+    NANOARROW_RETURN_NOT_OK(Check(precision.is_string(), error,
+                                  "Type[name=='floatingpoint'] bitWidth must be string"));
+
+    ArrowType type = NANOARROW_TYPE_UNINITIALIZED;
+    auto precision_str = precision.get<std::string>();
+    if (precision_str == "HALF") {
+      type = NANOARROW_TYPE_HALF_FLOAT;
+    } else if (precision_str == "SINGLE") {
+      type = NANOARROW_TYPE_FLOAT;
+    } else if (precision_str == "DOUBLE") {
+      type = NANOARROW_TYPE_DOUBLE;
+    } else {
+      ArrowErrorSet(
+          error,
+          "Type[name=='floatingpoint'] precision must be 'HALF', 'SINGLE', or 'DOUBLE'");
+      return EINVAL;
+    }
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetType(schema, type), error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeFixedSizeBinary(ArrowSchema* schema, const json& value,
+                                        ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("byteWidth"), error,
+              "Type[name=='fixedsizebinary'] missing key 'byteWidth'"));
+
+    const auto& byteWidth = value["byteWidth"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(byteWidth.is_number_integer(), error,
+              "Type[name=='fixedsizebinary'] byteWidth must be integer"));
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaSetTypeFixedSize(schema, NANOARROW_TYPE_FIXED_SIZE_BINARY,
+                                    byteWidth.get<int>()),
+        error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeDecimal(ArrowSchema* schema, const json& value,
+                                ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("bitWidth"), error,
+                                  "Type[name=='decimal'] missing key 'bitWidth'"));
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("precision"), error,
+                                  "Type[name=='decimal'] missing key 'precision'"));
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("scale"), error,
+                                  "Type[name=='decimal'] missing key 'scale'"));
+
+    const auto& bitWidth = value["bitWidth"];
+    NANOARROW_RETURN_NOT_OK(Check(bitWidth.is_number_integer(), error,
+                                  "Type[name=='decimal'] bitWidth must be integer"));
+
+    ArrowType type;
+    switch (bitWidth.get<int>()) {
+      case 128:
+        type = NANOARROW_TYPE_DECIMAL128;
+        break;
+      case 256:
+        type = NANOARROW_TYPE_DECIMAL256;
+        break;
+      default:
+        ArrowErrorSet(error, "Type[name=='decimal'] bitWidth must be 128 or 256");
+        return EINVAL;
+    }
+
+    const auto& precision = value["precision"];
+    NANOARROW_RETURN_NOT_OK(Check(precision.is_number_integer(), error,
+                                  "Type[name=='decimal'] precision must be integer"));
+
+    const auto& scale = value["scale"];
+    NANOARROW_RETURN_NOT_OK(Check(scale.is_number_integer(), error,
+                                  "Type[name=='decimal'] scale must be integer"));
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaSetTypeDecimal(schema, type, precision.get<int>(), scale.get<int>()),
+        error);
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeMap(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("keysSorted"), error,
+                                  "Type[name=='map'] missing key 'keysSorted'"));
+
+    const auto& keys_sorted = value["keysSorted"];
+    NANOARROW_RETURN_NOT_OK(Check(keys_sorted.is_boolean(), error,
+                                  "Type[name=='map'] keysSorted must be boolean"));
+
+    if (keys_sorted.get<bool>()) {
+      schema->flags |= ARROW_FLAG_MAP_KEYS_SORTED;
+    } else {
+      schema->flags &= ~ARROW_FLAG_MAP_KEYS_SORTED;
+    }
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetFormat(schema, "+m"), error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeFixedSizeList(ArrowSchema* schema, const json& value,
+                                      ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("listSize"), error,
+                                  "Type[name=='fixedsizelist'] missing key 'listSize'"));
+
+    const auto& list_size = value["listSize"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(list_size.is_number_integer(), error,
+              "Type[name=='fixedsizelist'] listSize must be integer"));
+
+    std::stringstream format_builder;
+    format_builder << "+w:" << list_size;
+    std::string format = format_builder.str();
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetFormat(schema, format.c_str()),
+                                       error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetTypeUnion(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("mode"), error, "Type[name=='union'] missing key 'mode'"));
+    NANOARROW_RETURN_NOT_OK(Check(value.contains("typeIds"), error,
+                                  "Type[name=='union'] missing key 'typeIds'"));
+
+    const auto& mode = value["mode"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(mode.is_string(), error, "Type[name=='union'] mode must be string"));
+
+    auto mode_str = mode.get<std::string>();
+    std::stringstream type_ids_format;
+
+    if (mode_str == "DENSE") {
+      type_ids_format << "+ud:";
+    } else if (mode_str == "SPARSE") {
+      type_ids_format << "+us:";
+    } else {
+      ArrowErrorSet(error, "Type[name=='union'] mode must be 'DENSE' or 'SPARSE'");
+      return EINVAL;
+    }
+
+    const auto& type_ids = value["typeIds"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(type_ids.is_array(), error, "Type[name=='union'] typeIds must be array"));
+
+    if (type_ids.size() > 0) {
+      for (size_t i = 0; i < type_ids.size(); i++) {
+        const auto& type_id = type_ids[i];
+        NANOARROW_RETURN_NOT_OK(
+            Check(type_id.is_number_integer(), error,
+                  "Type[name=='union'] typeIds item must be integer"));
+        type_ids_format << type_id;
+
+        if ((i + 1) < type_ids.size()) {
+          type_ids_format << ",";
+        }
+      }
+    }
+
+    std::string type_ids_format_str = type_ids_format.str();
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaSetFormat(schema, type_ids_format_str.c_str()), error);
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetMetadata(ArrowSchema* schema, const json& value, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.is_null() || value.is_array(), error,
+                                  "Field or Schema metadata must be null or array"));
+    if (value.is_null()) {
+      return NANOARROW_OK;
+    }
+
+    nanoarrow::UniqueBuffer metadata;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowMetadataBuilderInit(metadata.get(), nullptr),
+                                       error);
+    for (const auto& item : value) {
+      NANOARROW_RETURN_NOT_OK(
+          Check(item.is_object(), error, "metadata item must be object"));
+      NANOARROW_RETURN_NOT_OK(
+          Check(item.contains("key"), error, "metadata item missing key 'key'"));
+      NANOARROW_RETURN_NOT_OK(
+          Check(item.contains("value"), error, "metadata item missing key 'value'"));
+
+      const auto& key = item["key"];
+      const auto& value = item["value"];
+      NANOARROW_RETURN_NOT_OK(
+          Check(key.is_string(), error, "metadata item key must be string"));
+      NANOARROW_RETURN_NOT_OK(
+          Check(value.is_string(), error, "metadata item value must be string"));
+
+      auto key_str = key.get<std::string>();
+      auto value_str = value.get<std::string>();
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowMetadataBuilderAppend(metadata.get(), ArrowCharView(key_str.c_str()),
+                                     ArrowCharView(value_str.c_str())),
+          error);
+    }
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowSchemaSetMetadata(schema, reinterpret_cast<char*>(metadata->data)), error);
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode Check(bool value, ArrowError* error, const std::string& err) {
+    if (value) {
+      return NANOARROW_OK;
+    } else {
+      ArrowErrorSet(error, "%s", err.c_str());
+      return EINVAL;
+    }
+  }
+};
+
 /// @}
 
 }  // namespace testing
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc b/src/nanoarrow/nanoarrow_testing_test.cc
index d3cf9d7..73c8f79 100644
--- a/src/nanoarrow/nanoarrow_testing_test.cc
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -23,6 +23,7 @@
 
 #include "nanoarrow/nanoarrow_testing.hpp"
 
+using nanoarrow::testing::TestingJSONReader;
 using nanoarrow::testing::TestingJSONWriter;
 
 ArrowErrorCode WriteBatchJSON(std::ostream& out, const ArrowSchema* schema,
@@ -633,3 +634,281 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestTypeUnion) {
       [](ArrowArray* array) { return NANOARROW_OK; }, &WriteTypeJSON,
       R"({"name": "union", "mode": "DENSE", "typeIds": [0,1]})");
 }
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadSchema) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+
+  ASSERT_EQ(
+      reader.ReadSchema(
+          R"({"fields": [)"
+          R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null}], )"
+          R"("metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+  EXPECT_STREQ(schema->format, "+s");
+  ASSERT_EQ(schema->n_children, 1);
+  EXPECT_STREQ(schema->children[0]->format, "n");
+
+  // Check invalid JSON
+  EXPECT_EQ(reader.ReadSchema(R"({)", schema.get()), EINVAL);
+
+  // Check at least one failed Check()
+  EXPECT_EQ(reader.ReadSchema(R"("this is not a JSON object")", schema.get()), EINVAL);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadFieldBasic) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+  EXPECT_STREQ(schema->format, "n");
+  EXPECT_EQ(schema->name, nullptr);
+  EXPECT_TRUE(schema->flags & ARROW_FLAG_NULLABLE);
+  EXPECT_EQ(schema->n_children, 0);
+  EXPECT_EQ(schema->metadata, nullptr);
+
+  // Check non-nullable
+  schema.reset();
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": false, "type": {"name": "null"}, "children": [], "metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+  EXPECT_FALSE(schema->flags & ARROW_FLAG_NULLABLE);
+
+  // Check with name
+  schema.reset();
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": "colname", "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+  EXPECT_STREQ(schema->name, "colname");
+
+  // Check invalid JSON
+  EXPECT_EQ(reader.ReadField(R"({)", schema.get()), EINVAL);
+
+  // Check at least one failed Check()
+  EXPECT_EQ(reader.ReadField(R"("this is not a JSON object")", schema.get()), EINVAL);
+
+  // Check that field is validated
+  EXPECT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": true, "type": {"name": "fixedsizebinary", "byteWidth": -1}, "children": [], "metadata": null})",
+          schema.get()),
+      EINVAL);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadFieldMetadata) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], )"
+          R"("metadata": [{"key": "k1", "value": "v1"}, {"key": "k2", "value": "v2"}]})",
+          schema.get()),
+      NANOARROW_OK);
+
+  ArrowMetadataReader metadata;
+  ArrowStringView key;
+  ArrowStringView value;
+
+  ASSERT_EQ(ArrowMetadataReaderInit(&metadata, schema->metadata), NANOARROW_OK);
+  ASSERT_EQ(metadata.remaining_keys, 2);
+
+  ASSERT_EQ(ArrowMetadataReaderRead(&metadata, &key, &value), NANOARROW_OK);
+  ASSERT_EQ(std::string(key.data, key.size_bytes), "k1");
+  ASSERT_EQ(std::string(value.data, value.size_bytes), "v1");
+
+  ASSERT_EQ(ArrowMetadataReaderRead(&metadata, &key, &value), NANOARROW_OK);
+  ASSERT_EQ(std::string(key.data, key.size_bytes), "k2");
+  ASSERT_EQ(std::string(value.data, value.size_bytes), "v2");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadFieldNested) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": true, "type": {"name": "struct"}, "children": [)"
+          R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null}], )"
+          R"("metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+  EXPECT_STREQ(schema->format, "+s");
+  ASSERT_EQ(schema->n_children, 1);
+  EXPECT_STREQ(schema->children[0]->format, "n");
+}
+
+void TestFieldRoundtrip(const std::string& field_json) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+  TestingJSONWriter writer;
+  ArrowError error;
+  error.message[0] = '\0';
+
+  int result = reader.ReadField(field_json, schema.get(), &error);
+  ASSERT_EQ(result, NANOARROW_OK) << "Error: " << error.message;
+
+  std::stringstream field_json_roundtrip;
+  ASSERT_EQ(writer.WriteField(field_json_roundtrip, schema.get()), NANOARROW_OK);
+  EXPECT_EQ(field_json_roundtrip.str(), field_json);
+}
+
+void TestTypeRoundtrip(const std::string& type_json) {
+  std::stringstream field_json_builder;
+  field_json_builder << R"({"name": null, "nullable": true, "type": )" << type_json
+                     << R"(, "children": [], "metadata": null})";
+  TestFieldRoundtrip(field_json_builder.str());
+}
+
+void TestFieldError(const std::string& field_json, const std::string& msg,
+                    int code = EINVAL) {
+  nanoarrow::UniqueSchema schema;
+  TestingJSONReader reader;
+  ArrowError error;
+  error.message[0] = '\0';
+
+  EXPECT_EQ(reader.ReadField(field_json, schema.get(), &error), code);
+  EXPECT_EQ(std::string(error.message), msg);
+}
+
+void TestTypeError(const std::string& type_json, const std::string& msg,
+                   int code = EINVAL) {
+  std::stringstream field_json_builder;
+  field_json_builder << R"({"name": null, "nullable": true, "type": )" << type_json
+                     << R"(, "children": [], "metadata": null})";
+  TestFieldError(field_json_builder.str(), msg, code);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldPrimitive) {
+  TestTypeRoundtrip(R"({"name": "null"})");
+  TestTypeRoundtrip(R"({"name": "bool"})");
+  TestTypeRoundtrip(R"({"name": "utf8"})");
+  TestTypeRoundtrip(R"({"name": "largeutf8"})");
+  TestTypeRoundtrip(R"({"name": "binary"})");
+  TestTypeRoundtrip(R"({"name": "largebinary"})");
+
+  TestTypeError(R"({"name": "an unsupported type"})",
+                "Unsupported Type name: 'an unsupported type'", ENOTSUP);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldInt) {
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": true})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": true})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": true})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": true})");
+
+  TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": true})",
+                "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUInt) {
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": false})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": false})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": false})");
+  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": false})");
+
+  TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": false})",
+                "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFloatingPoint) {
+  TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "HALF"})");
+  TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "SINGLE"})");
+  TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "DOUBLE"})");
+
+  TestTypeError(
+      R"({"name": "floatingpoint", "precision": "NOT_A_PRECISION"})",
+      "Type[name=='floatingpoint'] precision must be 'HALF', 'SINGLE', or 'DOUBLE'");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFixedSizeBinary) {
+  TestTypeRoundtrip(R"({"name": "fixedsizebinary", "byteWidth": 123})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldDecimal) {
+  TestTypeRoundtrip(
+      R"({"name": "decimal", "bitWidth": 128, "precision": 10, "scale": 3})");
+  TestTypeRoundtrip(
+      R"({"name": "decimal", "bitWidth": 256, "precision": 10, "scale": 3})");
+
+  TestTypeError(R"({"name": "decimal", "bitWidth": 123, "precision": 10, "scale": 3})",
+                "Type[name=='decimal'] bitWidth must be 128 or 256");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldMap) {
+  // Sorted keys
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "map", "keysSorted": true}, "children": [)"
+      R"({"name": "entries", "nullable": false, "type": {"name": "struct"}, "children": [)"
+      R"({"name": null, "nullable": false, "type": {"name": "utf8"}, "children": [], "metadata": null}, )"
+      R"({"name": null, "nullable": true, "type": {"name": "bool"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})"
+      R"(], "metadata": null})");
+
+  // Unsorted keys
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "map", "keysSorted": false}, "children": [)"
+      R"({"name": "entries", "nullable": false, "type": {"name": "struct"}, "children": [)"
+      R"({"name": null, "nullable": false, "type": {"name": "utf8"}, "children": [], "metadata": null}, )"
+      R"({"name": null, "nullable": true, "type": {"name": "bool"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})"
+      R"(], "metadata": null})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldStruct) {
+  // Empty
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "struct"}, "children": [)"
+      R"(], "metadata": null})");
+
+  // Non-empty
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "struct"}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldList) {
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "list"}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "largelist"}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFixedSizeList) {
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "fixedsizelist", "listSize": 12}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUnion) {
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "union", "mode": "DENSE", "typeIds": [10,20]}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null}, )"
+      R"({"name": null, "nullable": true, "type": {"name": "utf8"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "union", "mode": "SPARSE", "typeIds": [10,20]}, "children": [)"
+      R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], "metadata": null}, )"
+      R"({"name": null, "nullable": true, "type": {"name": "utf8"}, "children": [], "metadata": null})"
+      R"(], "metadata": null})");
+
+  TestTypeError(R"({"name": "union", "mode": "NOT_A_MODE", "typeIds": []})",
+                "Type[name=='union'] mode must be 'DENSE' or 'SPARSE'");
+}