You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/04/08 15:24:17 UTC

[arrow] branch master updated: ARROW-16152: [C++] Fix segfault with unknown functions in Substrait

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b285350736 ARROW-16152: [C++] Fix segfault with unknown functions in Substrait
b285350736 is described below

commit b2853507363e7aa25abc298f20111704e3e24e2f
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Fri Apr 8 11:23:50 2022 -0400

    ARROW-16152: [C++] Fix segfault with unknown functions in Substrait
    
    Revised from #12828 with proper JIRA and unit test!
    
    Reprex (after the change in this PR...before this change it segfaults):
    
    ``` r
    arrow:::do_exec_plan_substrait('
    {
      "extensionUris": [
        {
          "extensionUriAnchor": 1
        }
      ],
      "extensions": [
        {
          "extensionFunction": {
            "extensionUriReference": 1,
            "functionAnchor": 2,
            "name": "abs_checked"
          }
        }
      ],
      "relations": [
        {
          "rel": {
            "project": {
              "input": {
                "read": {
                  "baseSchema": {
                    "names": [
                      "letter",
                      "number"
                    ],
                    "struct": {
                      "types": [
                        {
                          "string": {
    
                          }
                        },
                        {
                          "i32": {
    
                          }
                        }
                      ]
                    }
                  },
                  "namedTable": {
                    "names": [
                      "named_table_1"
                    ]
                  }
                }
              },
              "expressions": [
                {
                  "scalarFunction": {
                    "functionReference": 2,
                    "args": [
                      {
                        "selection": {
                          "directReference": {
                            "structField": {
                              "field": 1
                            }
                          }
                        }
                      }
                    ],
                    "outputType": {
    
                    }
                  }
                }
              ]
            }
          }
        }
      ]
    }
    ')
    #> Error: Invalid: Uri  was referenced by an extension but was not declared in the ExtensionSet.
    #> /Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/engine/substrait/extension_set.cc:161  set.impl_->CheckHasUri(function_ids[i].uri)
    #> /Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/engine/substrait/serde.cc:66  GetExtensionSetFromPlan(plan)
    ```
    
    Closes #12836 from paleolimbot/cpp-minor-typo
    
    Lead-authored-by: Dewey Dunnington <de...@fishandwhistle.net>
    Co-authored-by: David Li <li...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 cpp/src/arrow/engine/substrait/extension_set.cc |  2 +-
 cpp/src/arrow/engine/substrait/serde_test.cc    | 26 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc
index fe43ab2879..80cdf59f49 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.cc
+++ b/cpp/src/arrow/engine/substrait/extension_set.cc
@@ -164,7 +164,7 @@ Result<ExtensionSet> ExtensionSet::Make(std::vector<util::string_view> uris,
       set.functions_[i] = {rec->id, rec->function_name};
       continue;
     }
-    return Status::Invalid("Function ", function_ids[i].uri, "#", type_ids[i].name,
+    return Status::Invalid("Function ", function_ids[i].uri, "#", function_ids[i].name,
                            " not found");
   }
 
diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc
index 6af5d71521..300a6c528b 100644
--- a/cpp/src/arrow/engine/substrait/serde_test.cc
+++ b/cpp/src/arrow/engine/substrait/serde_test.cc
@@ -724,5 +724,31 @@ TEST(Substrait, ExtensionSetFromPlan) {
   EXPECT_EQ(decoded_add_func.name, "add");
 }
 
+TEST(Substrait, ExtensionSetFromPlanMissingFunc) {
+  ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({
+    "relations": [],
+    "extension_uris": [
+      {
+        "extension_uri_anchor": 7,
+        "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml"
+      }
+    ],
+    "extensions": [
+      {"extension_function": {
+        "extension_uri_reference": 7,
+        "function_anchor": 42,
+        "name": "does_not_exist"
+      }}
+    ]
+  })"));
+
+  ExtensionSet ext_set;
+  ASSERT_RAISES(
+      Invalid,
+      DeserializePlan(
+          *buf, [] { return std::shared_ptr<compute::SinkNodeConsumer>{nullptr}; },
+          &ext_set));
+}
+
 }  // namespace engine
 }  // namespace arrow