You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "pitrou (via GitHub)" <gi...@apache.org> on 2023/06/01 09:16:45 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #35860: GH-35730: [C++] Add the ability to specify custom schema on a dataset write

pitrou commented on code in PR #35860:
URL: https://github.com/apache/arrow/pull/35860#discussion_r1212851930


##########
cpp/src/arrow/dataset/file_base.cc:
##########
@@ -475,16 +475,38 @@ Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan* plan,
 
   const WriteNodeOptions write_node_options =
       checked_cast<const WriteNodeOptions&>(options);
-  const std::shared_ptr<const KeyValueMetadata>& custom_metadata =
-      write_node_options.custom_metadata;
+  const std::shared_ptr<Schema>& custom_schema = write_node_options.custom_schema;
   const FileSystemDatasetWriteOptions& write_options = write_node_options.write_options;
 
+  const std::shared_ptr<Schema>& input_schema = inputs[0]->output_schema();
+
+  if (custom_schema != nullptr) {
+    if (custom_schema->num_fields() != input_schema->num_fields()) {
+      return Status::Invalid(

Review Comment:
   `Status::TypeError` here?



##########
cpp/src/arrow/dataset/file_base.cc:
##########
@@ -475,16 +475,38 @@ Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan* plan,
 
   const WriteNodeOptions write_node_options =
       checked_cast<const WriteNodeOptions&>(options);
-  const std::shared_ptr<const KeyValueMetadata>& custom_metadata =
-      write_node_options.custom_metadata;
+  const std::shared_ptr<Schema>& custom_schema = write_node_options.custom_schema;
   const FileSystemDatasetWriteOptions& write_options = write_node_options.write_options;
 
+  const std::shared_ptr<Schema>& input_schema = inputs[0]->output_schema();
+
+  if (custom_schema != nullptr) {
+    if (custom_schema->num_fields() != input_schema->num_fields()) {
+      return Status::Invalid(
+          "The provided custom_schema did not have the same number of fields as the "
+          "data.  The custom schema can only be used to add metadata / nullability to "
+          "fields and cannot change the type or number of fields.");
+    }
+    for (int field_idx = 0; field_idx < input_schema->num_fields(); field_idx++) {
+      if (!input_schema->field(field_idx)->type()->Equals(
+              custom_schema->field(field_idx)->type())) {
+        return Status::Invalid("The provided custom_schema specified type ",

Review Comment:
   Same here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org