You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2022/07/28 21:41:06 UTC

[arrow] branch master updated: ARROW-16988: [C++] Introduce Substrait ToProto/FromProto conversion options (#13537)

This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a00777974 ARROW-16988: [C++] Introduce Substrait ToProto/FromProto conversion options (#13537)
9a00777974 is described below

commit 9a00777974df8c69921b0c08e80b5e6e842f41e9
Author: Jeroen van Straten <je...@gmail.com>
AuthorDate: Thu Jul 28 23:41:01 2022 +0200

    ARROW-16988: [C++] Introduce Substrait ToProto/FromProto conversion options (#13537)
    
    This introduces the conversion options structure described in the associated JIRA. In the interest of keeping this small, no behavior has been modified to make use of the options yet; the newly added options can only be used to relax the conversion semantics, so the current implementation already satisfies all of them. I'll submit followup issues for features that can make use of the relaxed semantics (ETA: most already existed, so I linked those instead).
    
    Authored-by: Jeroen van Straten <je...@gmail.com>
    Signed-off-by: Weston Pace <we...@gmail.com>
---
 .../arrow/engine/substrait/expression_internal.cc  | 118 +++++++++++++--------
 .../arrow/engine/substrait/expression_internal.h   |  13 ++-
 cpp/src/arrow/engine/substrait/options.h           |  65 ++++++++++++
 .../arrow/engine/substrait/relation_internal.cc    |  42 +++++---
 cpp/src/arrow/engine/substrait/relation_internal.h |   4 +-
 cpp/src/arrow/engine/substrait/serde.cc            |  83 +++++++++------
 cpp/src/arrow/engine/substrait/serde.h             |  57 +++++++---
 cpp/src/arrow/engine/substrait/type_internal.cc    |  68 +++++++-----
 cpp/src/arrow/engine/substrait/type_internal.h     |  11 +-
 9 files changed, 313 insertions(+), 148 deletions(-)

diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc
index 694edd614b..07c222bc4c 100644
--- a/cpp/src/arrow/engine/substrait/expression_internal.cc
+++ b/cpp/src/arrow/engine/substrait/expression_internal.cc
@@ -42,10 +42,12 @@ using ::arrow::internal::make_unique;
 }  // namespace internal
 
 Result<compute::Expression> FromProto(const substrait::Expression& expr,
-                                      const ExtensionSet& ext_set) {
+                                      const ExtensionSet& ext_set,
+                                      const ConversionOptions& conversion_options) {
   switch (expr.rex_type_case()) {
     case substrait::Expression::kLiteral: {
-      ARROW_ASSIGN_OR_RAISE(auto datum, FromProto(expr.literal(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto datum,
+                            FromProto(expr.literal(), ext_set, conversion_options));
       return compute::literal(std::move(datum));
     }
 
@@ -54,7 +56,8 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
 
       util::optional<compute::Expression> out;
       if (expr.selection().has_expression()) {
-        ARROW_ASSIGN_OR_RAISE(out, FromProto(expr.selection().expression(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(
+            out, FromProto(expr.selection().expression(), ext_set, conversion_options));
       }
 
       const auto* ref = &expr.selection().direct_reference();
@@ -126,9 +129,12 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
       if (if_then.ifs_size() == 0) break;
 
       if (if_then.ifs_size() == 1) {
-        ARROW_ASSIGN_OR_RAISE(auto if_, FromProto(if_then.ifs(0).if_(), ext_set));
-        ARROW_ASSIGN_OR_RAISE(auto then, FromProto(if_then.ifs(0).then(), ext_set));
-        ARROW_ASSIGN_OR_RAISE(auto else_, FromProto(if_then.else_(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(
+            auto if_, FromProto(if_then.ifs(0).if_(), ext_set, conversion_options));
+        ARROW_ASSIGN_OR_RAISE(
+            auto then, FromProto(if_then.ifs(0).then(), ext_set, conversion_options));
+        ARROW_ASSIGN_OR_RAISE(auto else_,
+                              FromProto(if_then.else_(), ext_set, conversion_options));
         return compute::call("if_else",
                              {std::move(if_), std::move(then), std::move(else_)});
       }
@@ -141,13 +147,16 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
       args.reserve(if_then.ifs_size() + 2);
       args.emplace_back();
       for (const auto& if_ : if_then.ifs()) {
-        ARROW_ASSIGN_OR_RAISE(auto compute_if, FromProto(if_.if_(), ext_set));
-        ARROW_ASSIGN_OR_RAISE(auto compute_then, FromProto(if_.then(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto compute_if,
+                              FromProto(if_.if_(), ext_set, conversion_options));
+        ARROW_ASSIGN_OR_RAISE(auto compute_then,
+                              FromProto(if_.then(), ext_set, conversion_options));
         conditions.emplace_back(std::move(compute_if));
         args.emplace_back(std::move(compute_then));
         condition_names.emplace_back("cond" + std::to_string(++name_counter));
       }
-      ARROW_ASSIGN_OR_RAISE(auto compute_else, FromProto(if_then.else_(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto compute_else,
+                            FromProto(if_then.else_(), ext_set, conversion_options));
       args.emplace_back(std::move(compute_else));
       args[0] = compute::call("make_struct", std::move(conditions),
                               compute::MakeStructOptions(condition_names));
@@ -165,7 +174,8 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
         const auto& argument = scalar_fn.arguments(i);
         switch (argument.arg_type_case()) {
           case substrait::FunctionArgument::kValue: {
-            ARROW_ASSIGN_OR_RAISE(arguments[i], FromProto(argument.value(), ext_set));
+            ARROW_ASSIGN_OR_RAISE(
+                arguments[i], FromProto(argument.value(), ext_set, conversion_options));
             break;
           }
           default:
@@ -178,8 +188,9 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
       if (func_name != "cast") {
         return compute::call(func_name, std::move(arguments));
       } else {
-        ARROW_ASSIGN_OR_RAISE(auto output_type_desc,
-                              FromProto(scalar_fn.output_type(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(
+            auto output_type_desc,
+            FromProto(scalar_fn.output_type(), ext_set, conversion_options));
         auto cast_options = compute::CastOptions::Safe(std::move(output_type_desc.first));
         return compute::call(func_name, std::move(arguments), std::move(cast_options));
       }
@@ -195,7 +206,8 @@ Result<compute::Expression> FromProto(const substrait::Expression& expr,
 }
 
 Result<Datum> FromProto(const substrait::Expression::Literal& lit,
-                        const ExtensionSet& ext_set) {
+                        const ExtensionSet& ext_set,
+                        const ConversionOptions& conversion_options) {
   if (lit.nullable()) {
     // FIXME not sure how this field should be interpreted and there's no way to round
     // trip it through arrow
@@ -295,7 +307,8 @@ Result<Datum> FromProto(const substrait::Expression::Literal& lit,
 
       ScalarVector fields(struct_.fields_size());
       for (int i = 0; i < struct_.fields_size(); ++i) {
-        ARROW_ASSIGN_OR_RAISE(auto field, FromProto(struct_.fields(i), ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto field,
+                              FromProto(struct_.fields(i), ext_set, conversion_options));
         DCHECK(field.is_scalar());
         fields[i] = field.scalar();
       }
@@ -321,7 +334,8 @@ Result<Datum> FromProto(const substrait::Expression::Literal& lit,
 
       ScalarVector values(list.values_size());
       for (int i = 0; i < list.values_size(); ++i) {
-        ARROW_ASSIGN_OR_RAISE(auto value, FromProto(list.values(i), ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto value,
+                              FromProto(list.values(i), ext_set, conversion_options));
         DCHECK(value.is_scalar());
         values[i] = value.scalar();
         if (element_type) {
@@ -360,8 +374,9 @@ Result<Datum> FromProto(const substrait::Expression::Literal& lit,
           return Status::Invalid("While converting to MapScalar encountered missing ",
                                  missing, " in ", map.DebugString());
         }
-        ARROW_ASSIGN_OR_RAISE(auto key, FromProto(kv.key(), ext_set));
-        ARROW_ASSIGN_OR_RAISE(auto value, FromProto(kv.value(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto key, FromProto(kv.key(), ext_set, conversion_options));
+        ARROW_ASSIGN_OR_RAISE(auto value,
+                              FromProto(kv.value(), ext_set, conversion_options));
 
         DCHECK(key.is_scalar());
         DCHECK(value.is_scalar());
@@ -402,20 +417,22 @@ Result<Datum> FromProto(const substrait::Expression::Literal& lit,
     }
 
     case substrait::Expression::Literal::kEmptyList: {
-      ARROW_ASSIGN_OR_RAISE(auto type_nullable,
-                            FromProto(lit.empty_list().type(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto type_nullable, FromProto(lit.empty_list().type(),
+                                                          ext_set, conversion_options));
       ARROW_ASSIGN_OR_RAISE(auto values, MakeEmptyArray(type_nullable.first));
       return ListScalar{std::move(values)};
     }
 
     case substrait::Expression::Literal::kEmptyMap: {
-      ARROW_ASSIGN_OR_RAISE(auto key_type_nullable,
-                            FromProto(lit.empty_map().key(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(
+          auto key_type_nullable,
+          FromProto(lit.empty_map().key(), ext_set, conversion_options));
       ARROW_ASSIGN_OR_RAISE(auto keys,
                             MakeEmptyArray(std::move(key_type_nullable.first)));
 
-      ARROW_ASSIGN_OR_RAISE(auto value_type_nullable,
-                            FromProto(lit.empty_map().value(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(
+          auto value_type_nullable,
+          FromProto(lit.empty_map().value(), ext_set, conversion_options));
       ARROW_ASSIGN_OR_RAISE(auto values,
                             MakeEmptyArray(std::move(value_type_nullable.first)));
 
@@ -430,7 +447,8 @@ Result<Datum> FromProto(const substrait::Expression::Literal& lit,
     }
 
     case substrait::Expression::Literal::kNull: {
-      ARROW_ASSIGN_OR_RAISE(auto type_nullable, FromProto(lit.null(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto type_nullable,
+                            FromProto(lit.null(), ext_set, conversion_options));
       if (!type_nullable.second) {
         return Status::Invalid("Substrait null literal ", lit.DebugString(),
                                " is of non-nullable type");
@@ -545,8 +563,8 @@ struct ScalarToProtoImpl {
 
   Status Visit(const ListScalar& s) {
     if (s.value->length() == 0) {
-      ARROW_ASSIGN_OR_RAISE(auto list_type,
-                            ToProto(*s.type, /*nullable=*/true, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto list_type, ToProto(*s.type, /*nullable=*/true, ext_set_,
+                                                    conversion_options_));
       lit_->set_allocated_empty_list(list_type->release_list());
       return Status::OK();
     }
@@ -554,16 +572,17 @@ struct ScalarToProtoImpl {
     lit_->set_allocated_list(new Lit::List());
 
     const auto& list_type = checked_cast<const ListType&>(*s.type);
-    ARROW_ASSIGN_OR_RAISE(
-        auto element_type,
-        ToProto(*list_type.value_type(), list_type.value_field()->nullable(), ext_set_));
+    ARROW_ASSIGN_OR_RAISE(auto element_type, ToProto(*list_type.value_type(),
+                                                     list_type.value_field()->nullable(),
+                                                     ext_set_, conversion_options_));
 
     auto values = lit_->mutable_list()->mutable_values();
     values->Reserve(static_cast<int>(s.value->length()));
 
     for (int64_t i = 0; i < s.value->length(); ++i) {
       ARROW_ASSIGN_OR_RAISE(Datum list_element, s.value->GetScalar(i));
-      ARROW_ASSIGN_OR_RAISE(auto lit, ToProto(list_element, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto lit,
+                            ToProto(list_element, ext_set_, conversion_options_));
       values->AddAllocated(lit.release());
     }
     return Status::OK();
@@ -576,7 +595,7 @@ struct ScalarToProtoImpl {
     fields->Reserve(static_cast<int>(s.value.size()));
 
     for (Datum field : s.value) {
-      ARROW_ASSIGN_OR_RAISE(auto lit, ToProto(field, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto lit, ToProto(field, ext_set_, conversion_options_));
       fields->AddAllocated(lit.release());
     }
     return Status::OK();
@@ -588,7 +607,8 @@ struct ScalarToProtoImpl {
 
   Status Visit(const MapScalar& s) {
     if (s.value->length() == 0) {
-      ARROW_ASSIGN_OR_RAISE(auto map_type, ToProto(*s.type, /*nullable=*/true, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto map_type, ToProto(*s.type, /*nullable=*/true, ext_set_,
+                                                   conversion_options_));
       lit_->set_allocated_empty_map(map_type->release_map());
       return Status::OK();
     }
@@ -604,11 +624,12 @@ struct ScalarToProtoImpl {
       auto kv = internal::make_unique<Lit::Map::KeyValue>();
 
       ARROW_ASSIGN_OR_RAISE(Datum key_scalar, kv_arr.field(0)->GetScalar(i));
-      ARROW_ASSIGN_OR_RAISE(auto key, ToProto(key_scalar, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto key, ToProto(key_scalar, ext_set_, conversion_options_));
       kv->set_allocated_key(key.release());
 
       ARROW_ASSIGN_OR_RAISE(Datum value_scalar, kv_arr.field(1)->GetScalar(i));
-      ARROW_ASSIGN_OR_RAISE(auto value, ToProto(value_scalar, ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto value,
+                            ToProto(value_scalar, ext_set_, conversion_options_));
       kv->set_allocated_value(value.release());
 
       key_values->AddAllocated(kv.release());
@@ -680,11 +701,13 @@ struct ScalarToProtoImpl {
 
   substrait::Expression::Literal* lit_;
   ExtensionSet* ext_set_;
+  const ConversionOptions& conversion_options_;
 };
 }  // namespace
 
-Result<std::unique_ptr<substrait::Expression::Literal>> ToProto(const Datum& datum,
-                                                                ExtensionSet* ext_set) {
+Result<std::unique_ptr<substrait::Expression::Literal>> ToProto(
+    const Datum& datum, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
   if (!datum.is_scalar()) {
     return Status::NotImplemented("representing ", datum.ToString(),
                                   " as a substrait::Expression::Literal");
@@ -693,9 +716,11 @@ Result<std::unique_ptr<substrait::Expression::Literal>> ToProto(const Datum& dat
   auto out = internal::make_unique<substrait::Expression::Literal>();
 
   if (datum.scalar()->is_valid) {
-    RETURN_NOT_OK((ScalarToProtoImpl{out.get(), ext_set})(*datum.scalar()));
+    RETURN_NOT_OK(
+        (ScalarToProtoImpl{out.get(), ext_set, conversion_options})(*datum.scalar()));
   } else {
-    ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*datum.type(), /*nullable=*/true, ext_set));
+    ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*datum.type(), /*nullable=*/true, ext_set,
+                                             conversion_options));
     out->set_allocated_null(type.release());
   }
 
@@ -802,8 +827,9 @@ static Result<std::unique_ptr<substrait::Expression>> MakeListElementReference(
   return MakeDirectReference(std::move(expr), std::move(ref_segment));
 }
 
-Result<std::unique_ptr<substrait::Expression>> ToProto(const compute::Expression& expr,
-                                                       ExtensionSet* ext_set) {
+Result<std::unique_ptr<substrait::Expression>> ToProto(
+    const compute::Expression& expr, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
   if (!expr.IsBound()) {
     return Status::Invalid("ToProto requires a bound Expression");
   }
@@ -811,7 +837,7 @@ Result<std::unique_ptr<substrait::Expression>> ToProto(const compute::Expression
   auto out = internal::make_unique<substrait::Expression>();
 
   if (auto datum = expr.literal()) {
-    ARROW_ASSIGN_OR_RAISE(auto literal, ToProto(*datum, ext_set));
+    ARROW_ASSIGN_OR_RAISE(auto literal, ToProto(*datum, ext_set, conversion_options));
     out->set_allocated_literal(literal.release());
     return std::move(out);
   }
@@ -840,12 +866,13 @@ Result<std::unique_ptr<substrait::Expression>> ToProto(const compute::Expression
       std::vector<std::unique_ptr<substrait::Expression>> arguments(
           call->arguments.size() - 1);
       for (size_t i = 1; i < call->arguments.size(); ++i) {
-        ARROW_ASSIGN_OR_RAISE(arguments[i - 1], ToProto(call->arguments[i], ext_set));
+        ARROW_ASSIGN_OR_RAISE(arguments[i - 1],
+                              ToProto(call->arguments[i], ext_set, conversion_options));
       }
 
       for (size_t i = 0; i < conditions->arguments.size(); ++i) {
-        ARROW_ASSIGN_OR_RAISE(auto cond_substrait,
-                              ToProto(conditions->arguments[i], ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto cond_substrait, ToProto(conditions->arguments[i],
+                                                           ext_set, conversion_options));
         auto clause = internal::make_unique<substrait::Expression::IfThen::IfClause>();
         clause->set_allocated_if_(cond_substrait.release());
         clause->set_allocated_then(arguments[i].release());
@@ -863,7 +890,8 @@ Result<std::unique_ptr<substrait::Expression>> ToProto(const compute::Expression
   // should be able to convert all its arguments first here
   std::vector<std::unique_ptr<substrait::Expression>> arguments(call->arguments.size());
   for (size_t i = 0; i < arguments.size(); ++i) {
-    ARROW_ASSIGN_OR_RAISE(arguments[i], ToProto(call->arguments[i], ext_set));
+    ARROW_ASSIGN_OR_RAISE(arguments[i],
+                          ToProto(call->arguments[i], ext_set, conversion_options));
   }
 
   if (call->function_name == "struct_field") {
diff --git a/cpp/src/arrow/engine/substrait/expression_internal.h b/cpp/src/arrow/engine/substrait/expression_internal.h
index 4e23dc8f70..2b4dec2a00 100644
--- a/cpp/src/arrow/engine/substrait/expression_internal.h
+++ b/cpp/src/arrow/engine/substrait/expression_internal.h
@@ -23,6 +23,7 @@
 
 #include "arrow/compute/type_fwd.h"
 #include "arrow/engine/substrait/extension_set.h"
+#include "arrow/engine/substrait/options.h"
 #include "arrow/engine/substrait/visibility.h"
 #include "arrow/type_fwd.h"
 
@@ -32,18 +33,22 @@ namespace arrow {
 namespace engine {
 
 ARROW_ENGINE_EXPORT
-Result<compute::Expression> FromProto(const substrait::Expression&, const ExtensionSet&);
+Result<compute::Expression> FromProto(const substrait::Expression&, const ExtensionSet&,
+                                      const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
 Result<std::unique_ptr<substrait::Expression>> ToProto(const compute::Expression&,
-                                                       ExtensionSet*);
+                                                       ExtensionSet*,
+                                                       const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
-Result<Datum> FromProto(const substrait::Expression::Literal&, const ExtensionSet&);
+Result<Datum> FromProto(const substrait::Expression::Literal&, const ExtensionSet&,
+                        const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
 Result<std::unique_ptr<substrait::Expression::Literal>> ToProto(const Datum&,
-                                                                ExtensionSet*);
+                                                                ExtensionSet*,
+                                                                const ConversionOptions&);
 
 }  // namespace engine
 }  // namespace arrow
diff --git a/cpp/src/arrow/engine/substrait/options.h b/cpp/src/arrow/engine/substrait/options.h
new file mode 100644
index 0000000000..dcb2088416
--- /dev/null
+++ b/cpp/src/arrow/engine/substrait/options.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+namespace arrow {
+namespace engine {
+
+/// How strictly to adhere to the input structure when converting between Substrait and
+/// Acero representations of a plan. This allows the user to trade conversion accuracy
+/// for performance and lenience.
+enum class ConversionStrictness {
+  /// When a primitive is used at the input that doesn't have an exact match at the
+  /// output, reject the conversion. This effectively asserts that there is no (known)
+  /// information loss in the conversion, and that plans should either round-trip back and
+  /// forth exactly or not at all. This option is primarily intended for testing and
+  /// debugging.
+  EXACT_ROUNDTRIP,
+
+  /// When a primitive is used at the input that doesn't have an exact match at the
+  /// output, attempt to model it with some collection of primitives at the output. This
+  /// means that even if the incoming plan is completely optimal by some metric, the
+  /// returned plan is fairly likely to not be optimal anymore, and round-trips back and
+  /// forth may make the plan increasingly suboptimal. However, every primitive at the
+  /// output can be (manually) traced back to exactly one primitive at the input, which
+  /// may be useful when debugging.
+  PRESERVE_STRUCTURE,
+
+  /// Behaves like PRESERVE_STRUCTURE, but prefers performance over structural accuracy.
+  /// Basic optimizations *may* be applied, in order to attempt to not regress in terms of
+  /// plan performance: if the incoming plan was already aggressively optimized, the goal
+  /// is for the output plan to not be less performant. In practical use cases, this is
+  /// probably the option you want.
+  ///
+  /// Note that no guarantees are made on top of PRESERVE_STRUCTURE. Past and future
+  /// versions of Arrow may even ignore this option entirely and treat it exactly like
+  /// PRESERVE_STRUCTURE.
+  BEST_EFFORT,
+};
+
+/// Options that control the conversion between Substrait and Acero representations of a
+/// plan.
+struct ConversionOptions {
+  /// \brief How strictly the converter should adhere to the structure of the input.
+  ConversionStrictness strictness = ConversionStrictness::BEST_EFFORT;
+};
+
+}  // namespace engine
+}  // namespace arrow
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc
index 8f6cb0ce36..8cc1da4d90 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.cc
+++ b/cpp/src/arrow/engine/substrait/relation_internal.cc
@@ -52,8 +52,8 @@ Status CheckRelCommon(const RelMessage& rel) {
   return Status::OK();
 }
 
-Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
-                                  const ExtensionSet& ext_set) {
+Result<DeclarationInfo> FromProto(const substrait::Rel& rel, const ExtensionSet& ext_set,
+                                  const ConversionOptions& conversion_options) {
   static bool dataset_init = false;
   if (!dataset_init) {
     dataset_init = true;
@@ -65,13 +65,15 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
       const auto& read = rel.read();
       RETURN_NOT_OK(CheckRelCommon(read));
 
-      ARROW_ASSIGN_OR_RAISE(auto base_schema, FromProto(read.base_schema(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto base_schema,
+                            FromProto(read.base_schema(), ext_set, conversion_options));
 
       auto scan_options = std::make_shared<dataset::ScanOptions>();
       scan_options->use_threads = true;
 
       if (read.has_filter()) {
-        ARROW_ASSIGN_OR_RAISE(scan_options->filter, FromProto(read.filter(), ext_set));
+        ARROW_ASSIGN_OR_RAISE(scan_options->filter,
+                              FromProto(read.filter(), ext_set, conversion_options));
       }
 
       if (read.has_projection()) {
@@ -196,12 +198,14 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
       if (!filter.has_input()) {
         return Status::Invalid("substrait::FilterRel with no input relation");
       }
-      ARROW_ASSIGN_OR_RAISE(auto input, FromProto(filter.input(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(filter.input(), ext_set, conversion_options));
 
       if (!filter.has_condition()) {
         return Status::Invalid("substrait::FilterRel with no condition expression");
       }
-      ARROW_ASSIGN_OR_RAISE(auto condition, FromProto(filter.condition(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto condition,
+                            FromProto(filter.condition(), ext_set, conversion_options));
 
       return DeclarationInfo{
           compute::Declaration::Sequence({
@@ -218,7 +222,8 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
       if (!project.has_input()) {
         return Status::Invalid("substrait::ProjectRel with no input relation");
       }
-      ARROW_ASSIGN_OR_RAISE(auto input, FromProto(project.input(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(project.input(), ext_set, conversion_options));
 
       // NOTE: Substrait ProjectRels *append* columns, while Acero's project node replaces
       // them. Therefore, we need to prefix all the current columns for compatibility.
@@ -229,7 +234,8 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
       }
       for (const auto& expr : project.expressions()) {
         expressions.emplace_back();
-        ARROW_ASSIGN_OR_RAISE(expressions.back(), FromProto(expr, ext_set));
+        ARROW_ASSIGN_OR_RAISE(expressions.back(),
+                              FromProto(expr, ext_set, conversion_options));
       }
 
       auto num_columns = static_cast<int>(expressions.size());
@@ -279,14 +285,17 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
           return Status::Invalid("Unsupported join type");
       }
 
-      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
-      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto left,
+                            FromProto(join.left(), ext_set, conversion_options));
+      ARROW_ASSIGN_OR_RAISE(auto right,
+                            FromProto(join.right(), ext_set, conversion_options));
 
       if (!join.has_expression()) {
         return Status::Invalid("substrait::JoinRel with no expression");
       }
 
-      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto expression,
+                            FromProto(join.expression(), ext_set, conversion_options));
 
       const auto* callptr = expression.call();
       if (!callptr) {
@@ -331,7 +340,8 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
         return Status::Invalid("substrait::AggregateRel with no input relation");
       }
 
-      ARROW_ASSIGN_OR_RAISE(auto input, FromProto(aggregate.input(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(aggregate.input(), ext_set, conversion_options));
 
       if (aggregate.groupings_size() > 1) {
         return Status::NotImplemented(
@@ -342,8 +352,8 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
       auto group = aggregate.groupings(0);
       keys.reserve(group.grouping_expressions_size());
       for (int exp_id = 0; exp_id < group.grouping_expressions_size(); exp_id++) {
-        ARROW_ASSIGN_OR_RAISE(auto expr,
-                              FromProto(group.grouping_expressions(exp_id), ext_set));
+        ARROW_ASSIGN_OR_RAISE(auto expr, FromProto(group.grouping_expressions(exp_id),
+                                                   ext_set, conversion_options));
         const auto* field_ref = expr.field_ref();
         if (field_ref) {
           keys.emplace_back(std::move(*field_ref));
@@ -372,8 +382,8 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel,
           auto func_name = std::string(func_record.id.name);
           // aggregate target
           auto subs_func_args = agg_func.arguments(0);
-          ARROW_ASSIGN_OR_RAISE(auto field_expr,
-                                FromProto(subs_func_args.value(), ext_set));
+          ARROW_ASSIGN_OR_RAISE(auto field_expr, FromProto(subs_func_args.value(),
+                                                           ext_set, conversion_options));
           auto target = field_expr.field_ref();
           if (!target) {
             return Status::Invalid(
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.h b/cpp/src/arrow/engine/substrait/relation_internal.h
index 4a8b6c209c..3699d1f657 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.h
+++ b/cpp/src/arrow/engine/substrait/relation_internal.h
@@ -21,6 +21,7 @@
 
 #include "arrow/compute/exec/exec_plan.h"
 #include "arrow/engine/substrait/extension_types.h"
+#include "arrow/engine/substrait/options.h"
 #include "arrow/engine/substrait/serde.h"
 #include "arrow/engine/substrait/visibility.h"
 #include "arrow/type_fwd.h"
@@ -40,7 +41,8 @@ struct DeclarationInfo {
 };
 
 ARROW_ENGINE_EXPORT
-Result<DeclarationInfo> FromProto(const substrait::Rel&, const ExtensionSet&);
+Result<DeclarationInfo> FromProto(const substrait::Rel&, const ExtensionSet&,
+                                  const ConversionOptions&);
 
 }  // namespace engine
 }  // namespace arrow
diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc
index 238008a714..87ad88dccb 100644
--- a/cpp/src/arrow/engine/substrait/serde.cc
+++ b/cpp/src/arrow/engine/substrait/serde.cc
@@ -52,10 +52,11 @@ Result<Message> ParseFromBuffer(const Buffer& buf) {
   return message;
 }
 
-Result<compute::Declaration> DeserializeRelation(const Buffer& buf,
-                                                 const ExtensionSet& ext_set) {
+Result<compute::Declaration> DeserializeRelation(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options) {
   ARROW_ASSIGN_OR_RAISE(auto rel, ParseFromBuffer<substrait::Rel>(buf));
-  ARROW_ASSIGN_OR_RAISE(auto decl_info, FromProto(rel, ext_set));
+  ARROW_ASSIGN_OR_RAISE(auto decl_info, FromProto(rel, ext_set, conversion_options));
   return std::move(decl_info.declaration);
 }
 
@@ -114,7 +115,8 @@ DeclarationFactory MakeWriteDeclarationFactory(
 
 Result<std::vector<compute::Declaration>> DeserializePlans(
     const Buffer& buf, DeclarationFactory declaration_factory,
-    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out) {
+    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out,
+    const ConversionOptions& conversion_options) {
   ARROW_ASSIGN_OR_RAISE(auto plan, ParseFromBuffer<substrait::Plan>(buf));
 
   ARROW_ASSIGN_OR_RAISE(auto ext_set, GetExtensionSetFromPlan(plan, registry));
@@ -123,8 +125,8 @@ Result<std::vector<compute::Declaration>> DeserializePlans(
   for (const substrait::PlanRel& plan_rel : plan.relations()) {
     ARROW_ASSIGN_OR_RAISE(
         auto decl_info,
-        FromProto(plan_rel.has_root() ? plan_rel.root().input() : plan_rel.rel(),
-                  ext_set));
+        FromProto(plan_rel.has_root() ? plan_rel.root().input() : plan_rel.rel(), ext_set,
+                  conversion_options));
     std::vector<std::string> names;
     if (plan_rel.has_root()) {
       names.assign(plan_rel.root().names().begin(), plan_rel.root().names().end());
@@ -147,16 +149,18 @@ Result<std::vector<compute::Declaration>> DeserializePlans(
 
 Result<std::vector<compute::Declaration>> DeserializePlans(
     const Buffer& buf, const ConsumerFactory& consumer_factory,
-    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out) {
+    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out,
+    const ConversionOptions& conversion_options) {
   return DeserializePlans(buf, MakeConsumingSinkDeclarationFactory(consumer_factory),
-                          registry, ext_set_out);
+                          registry, ext_set_out, conversion_options);
 }
 
 Result<std::vector<compute::Declaration>> DeserializePlans(
     const Buffer& buf, const WriteOptionsFactory& write_options_factory,
-    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out) {
+    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out,
+    const ConversionOptions& conversion_options) {
   return DeserializePlans(buf, MakeWriteDeclarationFactory(write_options_factory),
-                          registry, ext_set_out);
+                          registry, ext_set_out, conversion_options);
 }
 
 namespace {
@@ -176,7 +180,8 @@ Result<std::shared_ptr<compute::ExecPlan>> MakeSingleDeclarationPlan(
 
 Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     const Buffer& buf, const std::shared_ptr<compute::SinkNodeConsumer>& consumer,
-    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out) {
+    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out,
+    const ConversionOptions& conversion_options) {
   bool factory_done = false;
   auto single_consumer = [&factory_done, &consumer] {
     if (factory_done) {
@@ -185,14 +190,16 @@ Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     factory_done = true;
     return consumer;
   };
-  ARROW_ASSIGN_OR_RAISE(auto declarations,
-                        DeserializePlans(buf, single_consumer, registry, ext_set_out));
+  ARROW_ASSIGN_OR_RAISE(
+      auto declarations,
+      DeserializePlans(buf, single_consumer, registry, ext_set_out, conversion_options));
   return MakeSingleDeclarationPlan(declarations);
 }
 
 Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     const Buffer& buf, const std::shared_ptr<dataset::WriteNodeOptions>& write_options,
-    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out) {
+    const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out,
+    const ConversionOptions& conversion_options) {
   bool factory_done = false;
   auto single_write_options = [&factory_done, &write_options] {
     if (factory_done) {
@@ -201,47 +208,55 @@ Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     factory_done = true;
     return write_options;
   };
-  ARROW_ASSIGN_OR_RAISE(auto declarations, DeserializePlans(buf, single_write_options,
-                                                            registry, ext_set_out));
+  ARROW_ASSIGN_OR_RAISE(auto declarations,
+                        DeserializePlans(buf, single_write_options, registry, ext_set_out,
+                                         conversion_options));
   return MakeSingleDeclarationPlan(declarations);
 }
 
-Result<std::shared_ptr<Schema>> DeserializeSchema(const Buffer& buf,
-                                                  const ExtensionSet& ext_set) {
+Result<std::shared_ptr<Schema>> DeserializeSchema(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options) {
   ARROW_ASSIGN_OR_RAISE(auto named_struct, ParseFromBuffer<substrait::NamedStruct>(buf));
-  return FromProto(named_struct, ext_set);
+  return FromProto(named_struct, ext_set, conversion_options);
 }
 
-Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema,
-                                                ExtensionSet* ext_set) {
-  ARROW_ASSIGN_OR_RAISE(auto named_struct, ToProto(schema, ext_set));
+Result<std::shared_ptr<Buffer>> SerializeSchema(
+    const Schema& schema, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
+  ARROW_ASSIGN_OR_RAISE(auto named_struct, ToProto(schema, ext_set, conversion_options));
   std::string serialized = named_struct->SerializeAsString();
   return Buffer::FromString(std::move(serialized));
 }
 
-Result<std::shared_ptr<DataType>> DeserializeType(const Buffer& buf,
-                                                  const ExtensionSet& ext_set) {
+Result<std::shared_ptr<DataType>> DeserializeType(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options) {
   ARROW_ASSIGN_OR_RAISE(auto type, ParseFromBuffer<substrait::Type>(buf));
-  ARROW_ASSIGN_OR_RAISE(auto type_nullable, FromProto(type, ext_set));
+  ARROW_ASSIGN_OR_RAISE(auto type_nullable, FromProto(type, ext_set, conversion_options));
   return std::move(type_nullable.first);
 }
 
-Result<std::shared_ptr<Buffer>> SerializeType(const DataType& type,
-                                              ExtensionSet* ext_set) {
-  ARROW_ASSIGN_OR_RAISE(auto st_type, ToProto(type, /*nullable=*/true, ext_set));
+Result<std::shared_ptr<Buffer>> SerializeType(
+    const DataType& type, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
+  ARROW_ASSIGN_OR_RAISE(auto st_type,
+                        ToProto(type, /*nullable=*/true, ext_set, conversion_options));
   std::string serialized = st_type->SerializeAsString();
   return Buffer::FromString(std::move(serialized));
 }
 
-Result<compute::Expression> DeserializeExpression(const Buffer& buf,
-                                                  const ExtensionSet& ext_set) {
+Result<compute::Expression> DeserializeExpression(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options) {
   ARROW_ASSIGN_OR_RAISE(auto expr, ParseFromBuffer<substrait::Expression>(buf));
-  return FromProto(expr, ext_set);
+  return FromProto(expr, ext_set, conversion_options);
 }
 
-Result<std::shared_ptr<Buffer>> SerializeExpression(const compute::Expression& expr,
-                                                    ExtensionSet* ext_set) {
-  ARROW_ASSIGN_OR_RAISE(auto st_expr, ToProto(expr, ext_set));
+Result<std::shared_ptr<Buffer>> SerializeExpression(
+    const compute::Expression& expr, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
+  ARROW_ASSIGN_OR_RAISE(auto st_expr, ToProto(expr, ext_set, conversion_options));
   std::string serialized = st_expr->SerializeAsString();
   return Buffer::FromString(std::move(serialized));
 }
diff --git a/cpp/src/arrow/engine/substrait/serde.h b/cpp/src/arrow/engine/substrait/serde.h
index 9005553d30..5214606e1c 100644
--- a/cpp/src/arrow/engine/substrait/serde.h
+++ b/cpp/src/arrow/engine/substrait/serde.h
@@ -28,6 +28,7 @@
 #include "arrow/compute/exec/options.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/engine/substrait/extension_set.h"
+#include "arrow/engine/substrait/options.h"
 #include "arrow/engine/substrait/visibility.h"
 #include "arrow/result.h"
 #include "arrow/util/string_view.h"
@@ -51,11 +52,13 @@ using ConsumerFactory = std::function<std::shared_ptr<compute::SinkNodeConsumer>
 /// \param[in] registry an extension-id-registry to use, or null for the default one.
 /// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
 /// Plan is returned here.
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a vector of ExecNode declarations, one for each toplevel relation in the
 /// Substrait Plan
 ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
     const Buffer& buf, const ConsumerFactory& consumer_factory,
-    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR);
+    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a single-relation Substrait Plan message to an execution plan
 ///
@@ -68,12 +71,14 @@ ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
 /// relation
 /// \param[in] registry an extension-id-registry to use, or null for the default one.
 /// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// Plan is returned here.
 /// \return an ExecNode corresponding to the single toplevel relation in the Substrait
 /// Plan
 Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     const Buffer& buf, const std::shared_ptr<compute::SinkNodeConsumer>& consumer,
-    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR);
+    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
+    const ConversionOptions& conversion_options = {});
 
 /// Factory function type for generating the write options of a node consuming the batches
 /// produced by each toplevel Substrait relation when deserializing a Substrait Plan.
@@ -91,11 +96,13 @@ using WriteOptionsFactory = std::function<std::shared_ptr<dataset::WriteNodeOpti
 /// \param[in] registry an extension-id-registry to use, or null for the default one.
 /// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
 /// Plan is returned here.
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a vector of ExecNode declarations, one for each toplevel relation in the
 /// Substrait Plan
 ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
     const Buffer& buf, const WriteOptionsFactory& write_options_factory,
-    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR);
+    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a single-relation Substrait Plan message to an execution plan
 ///
@@ -109,11 +116,13 @@ ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
 /// \param[in] registry an extension-id-registry to use, or null for the default one.
 /// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
 /// Plan is returned here.
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a vector of ExecNode declarations, one for each toplevel relation in the
 /// Substrait Plan
 ARROW_ENGINE_EXPORT Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
     const Buffer& buf, const std::shared_ptr<dataset::WriteNodeOptions>& write_options,
-    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR);
+    const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a Substrait Type message to the corresponding Arrow type
 ///
@@ -121,21 +130,25 @@ ARROW_ENGINE_EXPORT Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
 /// message
 /// \param[in] ext_set the extension mapping to use, normally provided by the
 /// surrounding Plan message
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return the corresponding Arrow data type
 ARROW_ENGINE_EXPORT
-Result<std::shared_ptr<DataType>> DeserializeType(const Buffer& buf,
-                                                  const ExtensionSet& ext_set);
+Result<std::shared_ptr<DataType>> DeserializeType(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Serializes an Arrow type to a Substrait Type message
 ///
 /// \param[in] type the Arrow data type to serialize
 /// \param[in,out] ext_set the extension mapping to use; may be updated to add a
 /// mapping for the given type
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a buffer containing the protobuf serialization of the corresponding Substrait
 /// Type message
 ARROW_ENGINE_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeType(const DataType& type,
-                                              ExtensionSet* ext_set);
+Result<std::shared_ptr<Buffer>> SerializeType(
+    const DataType& type, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a Substrait NamedStruct message to an Arrow schema
 ///
@@ -143,21 +156,25 @@ Result<std::shared_ptr<Buffer>> SerializeType(const DataType& type,
 /// NamedStruct message
 /// \param[in] ext_set the extension mapping to use, normally provided by the
 /// surrounding Plan message
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return the corresponding Arrow schema
 ARROW_ENGINE_EXPORT
-Result<std::shared_ptr<Schema>> DeserializeSchema(const Buffer& buf,
-                                                  const ExtensionSet& ext_set);
+Result<std::shared_ptr<Schema>> DeserializeSchema(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Serializes an Arrow schema to a Substrait NamedStruct message
 ///
 /// \param[in] schema the Arrow schema to serialize
 /// \param[in,out] ext_set the extension mapping to use; may be updated to add
 /// mappings for the types used in the schema
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a buffer containing the protobuf serialization of the corresponding Substrait
 /// NamedStruct message
 ARROW_ENGINE_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema,
-                                                ExtensionSet* ext_set);
+Result<std::shared_ptr<Buffer>> SerializeSchema(
+    const Schema& schema, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a Substrait Expression message to a compute expression
 ///
@@ -165,21 +182,25 @@ Result<std::shared_ptr<Buffer>> SerializeSchema(const Schema& schema,
 /// Expression message
 /// \param[in] ext_set the extension mapping to use, normally provided by the
 /// surrounding Plan message
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return the corresponding Arrow compute expression
 ARROW_ENGINE_EXPORT
-Result<compute::Expression> DeserializeExpression(const Buffer& buf,
-                                                  const ExtensionSet& ext_set);
+Result<compute::Expression> DeserializeExpression(
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Serializes an Arrow compute expression to a Substrait Expression message
 ///
 /// \param[in] expr the Arrow compute expression to serialize
 /// \param[in,out] ext_set the extension mapping to use; may be updated to add
 /// mappings for the types used in the expression
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return a buffer containing the protobuf serialization of the corresponding Substrait
 /// Expression message
 ARROW_ENGINE_EXPORT
-Result<std::shared_ptr<Buffer>> SerializeExpression(const compute::Expression& expr,
-                                                    ExtensionSet* ext_set);
+Result<std::shared_ptr<Buffer>> SerializeExpression(
+    const compute::Expression& expr, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options = {});
 
 /// \brief Deserializes a Substrait Rel (relation) message to an ExecNode declaration
 ///
@@ -187,9 +208,11 @@ Result<std::shared_ptr<Buffer>> SerializeExpression(const compute::Expression& e
 /// Rel message
 /// \param[in] ext_set the extension mapping to use, normally provided by the
 /// surrounding Plan message
+/// \param[in] conversion_options options to control how the conversion is to be done.
 /// \return the corresponding ExecNode declaration
 ARROW_ENGINE_EXPORT Result<compute::Declaration> DeserializeRelation(
-    const Buffer& buf, const ExtensionSet& ext_set);
+    const Buffer& buf, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options = {});
 
 namespace internal {
 
diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc b/cpp/src/arrow/engine/substrait/type_internal.cc
index 6c65b32e2a..310413a892 100644
--- a/cpp/src/arrow/engine/substrait/type_internal.cc
+++ b/cpp/src/arrow/engine/substrait/type_internal.cc
@@ -62,7 +62,8 @@ Result<std::pair<std::shared_ptr<DataType>, bool>> FromProtoImpl(
 template <typename Types, typename NextName>
 Result<FieldVector> FieldsFromProto(int size, const Types& types,
                                     const NextName& next_name,
-                                    const ExtensionSet& ext_set) {
+                                    const ExtensionSet& ext_set,
+                                    const ConversionOptions& conversion_options) {
   FieldVector fields(size);
   for (int i = 0; i < size; ++i) {
     std::string name = next_name();
@@ -72,13 +73,14 @@ Result<FieldVector> FieldsFromProto(int size, const Types& types,
     if (types.Get(i).has_struct_()) {
       const auto& struct_ = types.Get(i).struct_();
 
-      ARROW_ASSIGN_OR_RAISE(
-          type, FieldsFromProto(struct_.types_size(), struct_.types(), next_name, ext_set)
-                    .Map(arrow::struct_));
+      ARROW_ASSIGN_OR_RAISE(type, FieldsFromProto(struct_.types_size(), struct_.types(),
+                                                  next_name, ext_set, conversion_options)
+                                      .Map(arrow::struct_));
 
       nullable = IsNullable(struct_);
     } else {
-      ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable), FromProto(types.Get(i), ext_set));
+      ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable),
+                            FromProto(types.Get(i), ext_set, conversion_options));
     }
 
     fields[i] = field(std::move(name), std::move(type), nullable);
@@ -89,7 +91,8 @@ Result<FieldVector> FieldsFromProto(int size, const Types& types,
 }  // namespace
 
 Result<std::pair<std::shared_ptr<DataType>, bool>> FromProto(
-    const ::substrait::Type& type, const ExtensionSet& ext_set) {
+    const ::substrait::Type& type, const ExtensionSet& ext_set,
+    const ConversionOptions& conversion_options) {
   switch (type.kind_case()) {
     case ::substrait::Type::kBool:
       return FromProtoImpl<BooleanType>(type.bool_());
@@ -151,9 +154,10 @@ Result<std::pair<std::shared_ptr<DataType>, bool>> FromProto(
     case ::substrait::Type::kStruct: {
       const auto& struct_ = type.struct_();
 
-      ARROW_ASSIGN_OR_RAISE(auto fields, FieldsFromProto(
-                                             struct_.types_size(), struct_.types(),
-                                             /*next_name=*/[] { return ""; }, ext_set));
+      ARROW_ASSIGN_OR_RAISE(
+          auto fields, FieldsFromProto(
+                           struct_.types_size(), struct_.types(),
+                           /*next_name=*/[] { return ""; }, ext_set, conversion_options));
 
       return FromProtoImpl<StructType>(struct_, std::move(fields));
     }
@@ -167,7 +171,8 @@ Result<std::pair<std::shared_ptr<DataType>, bool>> FromProto(
             list.DebugString());
       }
 
-      ARROW_ASSIGN_OR_RAISE(auto type_nullable, FromProto(list.type(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto type_nullable,
+                            FromProto(list.type(), ext_set, conversion_options));
       return FromProtoImpl<ListType>(
           list, field("item", std::move(type_nullable.first), type_nullable.second));
     }
@@ -182,8 +187,10 @@ Result<std::pair<std::shared_ptr<DataType>, bool>> FromProto(
                                missing, " type in ", map.DebugString());
       }
 
-      ARROW_ASSIGN_OR_RAISE(auto key_nullable, FromProto(map.key(), ext_set));
-      ARROW_ASSIGN_OR_RAISE(auto value_nullable, FromProto(map.value(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto key_nullable,
+                            FromProto(map.key(), ext_set, conversion_options));
+      ARROW_ASSIGN_OR_RAISE(auto value_nullable,
+                            FromProto(map.value(), ext_set, conversion_options));
 
       if (key_nullable.second) {
         return Status::Invalid(
@@ -296,8 +303,8 @@ struct DataTypeToProtoImpl {
 
   Status Visit(const ListType& t) {
     // FIXME assert default field name; custom ones won't roundtrip
-    ARROW_ASSIGN_OR_RAISE(
-        auto type, ToProto(*t.value_type(), t.value_field()->nullable(), ext_set_));
+    ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*t.value_type(), t.value_field()->nullable(),
+                                             ext_set_, conversion_options_));
     SetWithThen(&::substrait::Type::set_allocated_list)
         ->set_allocated_type(type.release());
     return Status::OK();
@@ -313,8 +320,8 @@ struct DataTypeToProtoImpl {
         return Status::Invalid(
             "::substrait::Type::Struct does not support field metadata");
       }
-      ARROW_ASSIGN_OR_RAISE(auto type,
-                            ToProto(*field->type(), field->nullable(), ext_set_));
+      ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*field->type(), field->nullable(),
+                                               ext_set_, conversion_options_));
       types->AddAllocated(type.release());
     }
     return Status::OK();
@@ -328,11 +335,12 @@ struct DataTypeToProtoImpl {
     // FIXME assert default field names; custom ones won't roundtrip
     auto map = SetWithThen(&::substrait::Type::set_allocated_map);
 
-    ARROW_ASSIGN_OR_RAISE(auto key, ToProto(*t.key_type(), /*nullable=*/false, ext_set_));
+    ARROW_ASSIGN_OR_RAISE(auto key, ToProto(*t.key_type(), /*nullable=*/false, ext_set_,
+                                            conversion_options_));
     map->set_allocated_key(key.release());
 
-    ARROW_ASSIGN_OR_RAISE(auto value,
-                          ToProto(*t.item_type(), t.item_field()->nullable(), ext_set_));
+    ARROW_ASSIGN_OR_RAISE(auto value, ToProto(*t.item_type(), t.item_field()->nullable(),
+                                              ext_set_, conversion_options_));
     map->set_allocated_value(value.release());
 
     return Status::OK();
@@ -407,18 +415,22 @@ struct DataTypeToProtoImpl {
   ::substrait::Type* type_;
   bool nullable_;
   ExtensionSet* ext_set_;
+  const ConversionOptions& conversion_options_;
 };
 }  // namespace
 
-Result<std::unique_ptr<::substrait::Type>> ToProto(const DataType& type, bool nullable,
-                                                   ExtensionSet* ext_set) {
+Result<std::unique_ptr<::substrait::Type>> ToProto(
+    const DataType& type, bool nullable, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
   auto out = internal::make_unique<::substrait::Type>();
-  RETURN_NOT_OK((DataTypeToProtoImpl{out.get(), nullable, ext_set})(type));
+  RETURN_NOT_OK(
+      (DataTypeToProtoImpl{out.get(), nullable, ext_set, conversion_options})(type));
   return std::move(out);
 }
 
 Result<std::shared_ptr<Schema>> FromProto(const ::substrait::NamedStruct& named_struct,
-                                          const ExtensionSet& ext_set) {
+                                          const ExtensionSet& ext_set,
+                                          const ConversionOptions& conversion_options) {
   if (!named_struct.has_struct_()) {
     return Status::Invalid("While converting ", named_struct.DebugString(),
                            " no anonymous struct type was provided to which names "
@@ -436,7 +448,7 @@ Result<std::shared_ptr<Schema>> FromProto(const ::substrait::NamedStruct& named_
                                                       ? named_struct.names().Get(i)
                                                       : "";
                                          },
-                                         ext_set));
+                                         ext_set, conversion_options));
 
   if (requested_names_count != named_struct.names_size()) {
     return Status::Invalid("While converting ", named_struct.DebugString(), " received ",
@@ -460,8 +472,9 @@ void ToProtoGetDepthFirstNames(const FieldVector& fields,
 }
 }  // namespace
 
-Result<std::unique_ptr<::substrait::NamedStruct>> ToProto(const Schema& schema,
-                                                          ExtensionSet* ext_set) {
+Result<std::unique_ptr<::substrait::NamedStruct>> ToProto(
+    const Schema& schema, ExtensionSet* ext_set,
+    const ConversionOptions& conversion_options) {
   if (schema.metadata()) {
     return Status::Invalid("::substrait::NamedStruct does not support schema metadata");
   }
@@ -481,7 +494,8 @@ Result<std::unique_ptr<::substrait::NamedStruct>> ToProto(const Schema& schema,
       return Status::Invalid("::substrait::NamedStruct does not support field metadata");
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*field->type(), field->nullable(), ext_set));
+    ARROW_ASSIGN_OR_RAISE(auto type, ToProto(*field->type(), field->nullable(), ext_set,
+                                             conversion_options));
     types->AddAllocated(type.release());
   }
 
diff --git a/cpp/src/arrow/engine/substrait/type_internal.h b/cpp/src/arrow/engine/substrait/type_internal.h
index e7c5bf2a87..6db9aea01a 100644
--- a/cpp/src/arrow/engine/substrait/type_internal.h
+++ b/cpp/src/arrow/engine/substrait/type_internal.h
@@ -22,6 +22,7 @@
 #include <utility>
 
 #include "arrow/engine/substrait/extension_set.h"
+#include "arrow/engine/substrait/options.h"
 #include "arrow/engine/substrait/visibility.h"
 #include "arrow/type_fwd.h"
 
@@ -32,18 +33,20 @@ namespace engine {
 
 ARROW_ENGINE_EXPORT
 Result<std::pair<std::shared_ptr<DataType>, bool>> FromProto(const substrait::Type&,
-                                                             const ExtensionSet&);
+                                                             const ExtensionSet&,
+                                                             const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
 Result<std::unique_ptr<substrait::Type>> ToProto(const DataType&, bool nullable,
-                                                 ExtensionSet*);
+                                                 ExtensionSet*, const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
 Result<std::shared_ptr<Schema>> FromProto(const substrait::NamedStruct&,
-                                          const ExtensionSet&);
+                                          const ExtensionSet&, const ConversionOptions&);
 
 ARROW_ENGINE_EXPORT
-Result<std::unique_ptr<substrait::NamedStruct>> ToProto(const Schema&, ExtensionSet*);
+Result<std::unique_ptr<substrait::NamedStruct>> ToProto(const Schema&, ExtensionSet*,
+                                                        const ConversionOptions&);
 
 inline std::string TimestampTzTimezoneString() { return "UTC"; }