You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/04/09 18:33:01 UTC

[arrow] branch master updated: ARROW-4622: [C++][Python] MakeDense and MakeSparse in UnionArray should accept a vector of Field

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 003d8d0  ARROW-4622: [C++][Python] MakeDense and MakeSparse in UnionArray should accept a vector of Field
003d8d0 is described below

commit 003d8d0d50b68d9629d567cae12a3aed4156125b
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Tue Apr 9 20:32:54 2019 +0200

    ARROW-4622: [C++][Python] MakeDense and MakeSparse in UnionArray should accept a vector of Field
    
    ## TODO:
    
    - [x] Write tests for existing behaviors
    - [x] Support to supply field names
    - [x] union_(field_names, children, mode)
    - [x] Support to supply type codes
    - [x] make format
    - [x] Fix GLib binding
    - [x] Fix Ruby binding
    - [x] Fix Python binding
    - [ ] Make CI green
    
    Author: Kenta Murata <mr...@mrkn.jp>
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #3723 from mrkn/make_union_array_with_field_names and squashes the following commits:
    
    1480c3c72 <Antoine Pitrou> Some nits
    90db62f97 <Kenta Murata>  Fix coding style
    c81b1c4fd <Kenta Murata>  ninja format
    8c598c9cf <Kenta Murata>  Consolidate test cases
    6c840454c <Kenta Murata>  Fix variable names
    b04e7cfdb <Kenta Murata>  Fix style
    40c1c6257 <Kenta Murata>  Add support to create union array with field names and type codes
    ec24b41d9 <Kenta Murata>  Refactoring
    66ae94210 <Kenta Murata>  Add support to supply type codes
    dc475ad1d <Kenta Murata>  make format
    18c574a51 <Kenta Murata>  Add support to supply type codes to union_
    d64882111 <Kenta Murata>  Replace MakeUnionType with union_
    09fd89ce9 <Kenta Murata>  Add support to supply field names
    ce7ee3752 <Kenta Murata>  Add tests of MakeDense and MakeSparse of UnionArray
---
 cpp/src/arrow/CMakeLists.txt         |   6 +-
 cpp/src/arrow/array-union-test.cc    | 133 ++++++++++++++++++++++++++++++++---
 cpp/src/arrow/array.cc               |  28 +++++++-
 cpp/src/arrow/array.h                | 119 +++++++++++++++++++++++++++++++
 cpp/src/arrow/type.cc                |  14 +++-
 cpp/src/arrow/type.h                 |  18 ++++-
 python/pyarrow/array.pxi             |  34 +++++++--
 python/pyarrow/includes/libarrow.pxd |   4 ++
 python/pyarrow/tests/test_array.py   |  69 ++++++++++++++++--
 9 files changed, 399 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d4fb19f..c045704 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -322,7 +322,8 @@ if(WIN32)
                  array-test.cc
                  array-binary-test.cc
                  array-list-test.cc
-                 array-struct-test.cc)
+                 array-struct-test.cc
+                 array-union-test.cc)
 else()
   add_arrow_test(array-test
                  SOURCES
@@ -330,7 +331,8 @@ else()
                  array-binary-test.cc
                  array-dict-test.cc
                  array-list-test.cc
-                 array-struct-test.cc)
+                 array-struct-test.cc
+                 array-union-test.cc)
 endif()
 
 add_arrow_test(buffer-test)
diff --git a/cpp/src/arrow/array-union-test.cc b/cpp/src/arrow/array-union-test.cc
index 067d195..86cbeae 100644
--- a/cpp/src/arrow/array-union-test.cc
+++ b/cpp/src/arrow/array-union-test.cc
@@ -15,20 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <vector>
+#include <string>
 
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
-#include "arrow/builder.h"
-#include "arrow/status.h"
 // TODO ipc shouldn't be included here
 #include "arrow/ipc/test-common.h"
-#include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 
@@ -36,7 +31,7 @@ namespace arrow {
 
 using internal::checked_cast;
 
-TEST(TestUnionArrayAdHoc, TestSliceEquals) {
+TEST(TestUnionArray, TestSliceEquals) {
   std::shared_ptr<RecordBatch> batch;
   ASSERT_OK(ipc::test::MakeUnion(&batch));
 
@@ -71,4 +66,126 @@ TEST(TestUnionArrayAdHoc, TestSliceEquals) {
   CheckUnion(batch->column(2));
 }
 
+// -------------------------------------------------------------------------
+// Tests for MakeDense and MakeSparse
+
+class TestUnionArrayFactories : public ::testing::Test {
+ public:
+  void SetUp() {
+    pool_ = default_memory_pool();
+    ArrayFromVector<Int8Type>({0, 1, 2, 0, 1, 3, 2, 0, 2, 1}, &type_ids_);
+  }
+
+  void CheckUnionArray(const UnionArray& array, UnionMode::type mode,
+                       const std::vector<std::string>& field_names,
+                       const std::vector<uint8_t>& type_codes) {
+    ASSERT_EQ(mode, array.mode());
+    CheckFieldNames(array, field_names);
+    CheckTypeCodes(array, type_codes);
+  }
+
+  void CheckFieldNames(const UnionArray& array, const std::vector<std::string>& names) {
+    const auto& type = checked_cast<const UnionType&>(*array.type());
+    ASSERT_EQ(type.num_children(), names.size());
+    for (int i = 0; i < type.num_children(); ++i) {
+      ASSERT_EQ(type.child(i)->name(), names[i]);
+    }
+  }
+
+  void CheckTypeCodes(const UnionArray& array, const std::vector<uint8_t>& codes) {
+    const auto& type = checked_cast<const UnionType&>(*array.type());
+    ASSERT_EQ(codes, type.type_codes());
+  }
+
+ protected:
+  MemoryPool* pool_;
+  std::shared_ptr<Array> type_ids_;
+};
+
+TEST_F(TestUnionArrayFactories, TestMakeDense) {
+  std::shared_ptr<Array> value_offsets;
+  ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, 0, 1, 2, 1, 2}, &value_offsets);
+
+  auto children = std::vector<std::shared_ptr<Array>>(4);
+  ArrayFromVector<StringType, std::string>({"abc", "def", "xyz"}, &children[0]);
+  ArrayFromVector<UInt8Type>({10, 20, 30}, &children[1]);
+  ArrayFromVector<DoubleType>({1.618, 2.718, 3.142}, &children[2]);
+  ArrayFromVector<Int8Type>({-12}, &children[3]);
+
+  std::vector<std::string> field_names = {"str", "int1", "real", "int2"};
+  std::vector<uint8_t> type_codes = {1, 2, 4, 8};
+
+  std::shared_ptr<Array> result;
+
+  // without field names and type codes
+  ASSERT_OK(UnionArray::MakeDense(*type_ids_, *value_offsets, children, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::DENSE,
+                  {"0", "1", "2", "3"}, {0, 1, 2, 3});
+
+  // with field name
+  ASSERT_RAISES(Invalid, UnionArray::MakeDense(*type_ids_, *value_offsets, children,
+                                               {"one"}, &result));
+  ASSERT_OK(
+      UnionArray::MakeDense(*type_ids_, *value_offsets, children, field_names, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::DENSE, field_names,
+                  {0, 1, 2, 3});
+
+  // with type codes
+  ASSERT_RAISES(Invalid, UnionArray::MakeDense(*type_ids_, *value_offsets, children,
+                                               std::vector<uint8_t>{0}, &result));
+  ASSERT_OK(
+      UnionArray::MakeDense(*type_ids_, *value_offsets, children, type_codes, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::DENSE,
+                  {"0", "1", "2", "3"}, type_codes);
+
+  // with field names and type codes
+  ASSERT_RAISES(Invalid, UnionArray::MakeDense(*type_ids_, *value_offsets, children,
+                                               {"one"}, type_codes, &result));
+  ASSERT_OK(UnionArray::MakeDense(*type_ids_, *value_offsets, children, field_names,
+                                  type_codes, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::DENSE, field_names,
+                  type_codes);
+}
+
+TEST_F(TestUnionArrayFactories, TestMakeSparse) {
+  auto children = std::vector<std::shared_ptr<Array>>(4);
+  ArrayFromVector<StringType, std::string>(
+      {"abc", "", "", "def", "", "", "", "xyz", "", ""}, &children[0]);
+  ArrayFromVector<UInt8Type>({0, 10, 0, 0, 20, 0, 0, 0, 0, 30}, &children[1]);
+  ArrayFromVector<DoubleType>({0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0},
+                              &children[2]);
+  ArrayFromVector<Int8Type>({0, 0, 0, 0, 0, -12, 0, 0, 0, 0}, &children[3]);
+
+  std::vector<std::string> field_names = {"str", "int1", "real", "int2"};
+  std::vector<uint8_t> type_codes = {1, 2, 4, 8};
+
+  std::shared_ptr<Array> result;
+
+  // without field names and type codes
+  ASSERT_OK(UnionArray::MakeSparse(*type_ids_, children, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE,
+                  {"0", "1", "2", "3"}, {0, 1, 2, 3});
+
+  // with field names
+  ASSERT_RAISES(Invalid, UnionArray::MakeSparse(*type_ids_, children, {"one"}, &result));
+  ASSERT_OK(UnionArray::MakeSparse(*type_ids_, children, field_names, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names,
+                  {0, 1, 2, 3});
+
+  // with type codes
+  ASSERT_RAISES(Invalid, UnionArray::MakeSparse(*type_ids_, children,
+                                                std::vector<uint8_t>{0}, &result));
+  ASSERT_OK(UnionArray::MakeSparse(*type_ids_, children, type_codes, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE,
+                  {"0", "1", "2", "3"}, type_codes);
+
+  // with field names and type codes
+  ASSERT_RAISES(Invalid, UnionArray::MakeSparse(*type_ids_, children, {"one"}, type_codes,
+                                                &result));
+  ASSERT_OK(
+      UnionArray::MakeSparse(*type_ids_, children, field_names, type_codes, &result));
+  CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names,
+                  type_codes);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index bcf4342..5956dd2 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -504,6 +504,8 @@ UnionArray::UnionArray(const std::shared_ptr<DataType>& type, int64_t length,
 
 Status UnionArray::MakeDense(const Array& type_ids, const Array& value_offsets,
                              const std::vector<std::shared_ptr<Array>>& children,
+                             const std::vector<std::string>& field_names,
+                             const std::vector<uint8_t>& type_codes,
                              std::shared_ptr<Array>* out) {
   if (value_offsets.length() == 0) {
     return Status::Invalid("UnionArray offsets must have non-zero length");
@@ -521,10 +523,20 @@ Status UnionArray::MakeDense(const Array& type_ids, const Array& value_offsets,
     return Status::Invalid("MakeDense does not allow NAs in value_offsets");
   }
 
+  if (field_names.size() > 0 && field_names.size() != children.size()) {
+    return Status::Invalid("field_names must have the same length as children");
+  }
+
+  if (type_codes.size() > 0 && type_codes.size() != children.size()) {
+    return Status::Invalid("type_codes must have the same length as children");
+  }
+
   BufferVector buffers = {type_ids.null_bitmap(),
                           checked_cast<const Int8Array&>(type_ids).values(),
                           checked_cast<const Int32Array&>(value_offsets).values()};
-  auto union_type = union_(children, UnionMode::DENSE);
+
+  std::shared_ptr<DataType> union_type =
+      union_(children, field_names, type_codes, UnionMode::DENSE);
   auto internal_data = ArrayData::Make(union_type, type_ids.length(), std::move(buffers),
                                        type_ids.null_count(), type_ids.offset());
   for (const auto& child : children) {
@@ -536,13 +548,25 @@ Status UnionArray::MakeDense(const Array& type_ids, const Array& value_offsets,
 
 Status UnionArray::MakeSparse(const Array& type_ids,
                               const std::vector<std::shared_ptr<Array>>& children,
+                              const std::vector<std::string>& field_names,
+                              const std::vector<uint8_t>& type_codes,
                               std::shared_ptr<Array>* out) {
   if (type_ids.type_id() != Type::INT8) {
     return Status::Invalid("UnionArray type_ids must be signed int8");
   }
+
+  if (field_names.size() > 0 && field_names.size() != children.size()) {
+    return Status::Invalid("field_names must have the same length as children");
+  }
+
+  if (type_codes.size() > 0 && type_codes.size() != children.size()) {
+    return Status::Invalid("type_codes must have the same length as children");
+  }
+
   BufferVector buffers = {type_ids.null_bitmap(),
                           checked_cast<const Int8Array&>(type_ids).values(), nullptr};
-  auto union_type = union_(children, UnionMode::SPARSE);
+  std::shared_ptr<DataType> union_type =
+      union_(children, field_names, type_codes, UnionMode::SPARSE);
   auto internal_data = ArrayData::Make(union_type, type_ids.length(), std::move(buffers),
                                        type_ids.null_count(), type_ids.offset());
   for (const auto& child : children) {
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index a81f934..653993d 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -734,11 +734,75 @@ class ARROW_EXPORT UnionArray : public Array {
   /// relative offset into the respective child array for the type in a given slot.
   /// The respective offsets for each child value array must be in order / increasing.
   /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
   /// \param[out] out Will have length equal to value_offsets.length()
   static Status MakeDense(const Array& type_ids, const Array& value_offsets,
                           const std::vector<std::shared_ptr<Array>>& children,
+                          const std::vector<std::string>& field_names,
+                          const std::vector<uint8_t>& type_codes,
                           std::shared_ptr<Array>* out);
 
+  /// \brief Construct Dense UnionArray from types_ids, value_offsets and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types. The value_offsets are assumed to be well-formed.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[out] out Will have length equal to value_offsets.length()
+  static Status MakeDense(const Array& type_ids, const Array& value_offsets,
+                          const std::vector<std::shared_ptr<Array>>& children,
+                          const std::vector<std::string>& field_names,
+                          std::shared_ptr<Array>* out) {
+    return MakeDense(type_ids, value_offsets, children, field_names, {}, out);
+  }
+
+  /// \brief Construct Dense UnionArray from types_ids, value_offsets and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types. The value_offsets are assumed to be well-formed.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  /// \param[out] out Will have length equal to value_offsets.length()
+  static Status MakeDense(const Array& type_ids, const Array& value_offsets,
+                          const std::vector<std::shared_ptr<Array>>& children,
+                          const std::vector<uint8_t>& type_codes,
+                          std::shared_ptr<Array>* out) {
+    return MakeDense(type_ids, value_offsets, children, {}, type_codes, out);
+  }
+
+  /// \brief Construct Dense UnionArray from types_ids, value_offsets and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types. The value_offsets are assumed to be well-formed.
+  ///
+  /// The name of each field is filled by the index of the field.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[out] out Will have length equal to value_offsets.length()
+  static Status MakeDense(const Array& type_ids, const Array& value_offsets,
+                          const std::vector<std::shared_ptr<Array>>& children,
+                          std::shared_ptr<Array>* out) {
+    return MakeDense(type_ids, value_offsets, children, {}, {}, out);
+  }
+
   /// \brief Construct Sparse UnionArray from type_ids and children
   ///
   /// This function does the bare minimum of validation of the offsets and
@@ -747,11 +811,66 @@ class ARROW_EXPORT UnionArray : public Array {
   /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
   /// 0 corresponding to each type.
   /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
   /// \param[out] out Will have length equal to type_ids.length()
   static Status MakeSparse(const Array& type_ids,
                            const std::vector<std::shared_ptr<Array>>& children,
+                           const std::vector<std::string>& field_names,
+                           const std::vector<uint8_t>& type_codes,
                            std::shared_ptr<Array>* out);
 
+  /// \brief Construct Sparse UnionArray from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[out] out Will have length equal to type_ids.length()
+  static Status MakeSparse(const Array& type_ids,
+                           const std::vector<std::shared_ptr<Array>>& children,
+                           const std::vector<std::string>& field_names,
+                           std::shared_ptr<Array>* out) {
+    return MakeSparse(type_ids, children, field_names, {}, out);
+  }
+
+  /// \brief Construct Sparse UnionArray from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  /// \param[out] out Will have length equal to type_ids.length()
+  static Status MakeSparse(const Array& type_ids,
+                           const std::vector<std::shared_ptr<Array>>& children,
+                           const std::vector<uint8_t>& type_codes,
+                           std::shared_ptr<Array>* out) {
+    return MakeSparse(type_ids, children, {}, type_codes, out);
+  }
+
+  /// \brief Construct Sparse UnionArray from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// The name of each field is filled by the index of the field.
+  ///
+  /// \param[in] type_ids An array of 8-bit signed integers, enumerated from
+  /// 0 corresponding to each type.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[out] out Will have length equal to type_ids.length()
+  static Status MakeSparse(const Array& type_ids,
+                           const std::vector<std::shared_ptr<Array>>& children,
+                           std::shared_ptr<Array>* out) {
+    return MakeSparse(type_ids, children, {}, {}, out);
+  }
+
   /// Note that this buffer does not account for any slice offset
   std::shared_ptr<Buffer> type_ids() const { return data_->buffers[1]; }
 
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index fd37726..0e0d9fc 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -576,13 +576,21 @@ std::shared_ptr<DataType> union_(const std::vector<std::shared_ptr<Field>>& chil
 }
 
 std::shared_ptr<DataType> union_(const std::vector<std::shared_ptr<Array>>& children,
+                                 const std::vector<std::string>& field_names,
+                                 const std::vector<uint8_t>& given_type_codes,
                                  UnionMode::type mode) {
   std::vector<std::shared_ptr<Field>> types;
-  std::vector<uint8_t> type_codes;
+  std::vector<uint8_t> type_codes(given_type_codes);
   uint8_t counter = 0;
   for (const auto& child : children) {
-    types.push_back(field(std::to_string(counter), child->type()));
-    type_codes.push_back(counter);
+    if (field_names.size() == 0) {
+      types.push_back(field(std::to_string(counter), child->type()));
+    } else {
+      types.push_back(field(field_names[counter], child->type()));
+    }
+    if (given_type_codes.size() == 0) {
+      type_codes.push_back(counter);
+    }
     counter++;
   }
   return union_(types, type_codes, mode);
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 525d174..4c35378 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -920,7 +920,23 @@ union_(const std::vector<std::shared_ptr<Field>>& child_fields,
 /// \brief Create a UnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT
 union_(const std::vector<std::shared_ptr<Array>>& children,
-       UnionMode::type mode = UnionMode::SPARSE);
+       const std::vector<std::string>& field_names,
+       const std::vector<uint8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE);
+
+/// \brief Create a UnionType instance
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Array>>& children,
+       const std::vector<std::string>& field_names,
+       UnionMode::type mode = UnionMode::SPARSE) {
+  return union_(children, field_names, {}, mode);
+}
+
+/// \brief Create a UnionType instance
+inline std::shared_ptr<DataType> ARROW_EXPORT
+union_(const std::vector<std::shared_ptr<Array>>& children,
+       UnionMode::type mode = UnionMode::SPARSE) {
+  return union_(children, {}, {}, mode);
+}
 
 /// \brief Create a DictionaryType instance
 std::shared_ptr<DataType> ARROW_EXPORT
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1966245..9c57634 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1054,7 +1054,8 @@ cdef class UnionArray(Array):
     """
 
     @staticmethod
-    def from_dense(Array types, Array value_offsets, list children):
+    def from_dense(Array types, Array value_offsets, list children,
+                   list field_names=None, list type_codes=None):
         """
         Construct dense UnionArray from arrays of int8 types, int32 offsets and
         children arrays
@@ -1064,6 +1065,8 @@ cdef class UnionArray(Array):
         types : Array (int8 type)
         value_offsets : Array (int32 type)
         children : list
+        field_names : list
+        type_codes : list
 
         Returns
         -------
@@ -1072,15 +1075,25 @@ cdef class UnionArray(Array):
         cdef shared_ptr[CArray] out
         cdef vector[shared_ptr[CArray]] c
         cdef Array child
+        cdef vector[c_string] c_field_names
+        cdef vector[uint8_t] c_type_codes
         for child in children:
             c.push_back(child.sp_array)
+        if field_names is not None:
+            for x in field_names:
+                c_field_names.push_back(tobytes(x))
+        if type_codes is not None:
+            for x in type_codes:
+                c_type_codes.push_back(x)
         with nogil:
             check_status(CUnionArray.MakeDense(
-                deref(types.ap), deref(value_offsets.ap), c, &out))
+                deref(types.ap), deref(value_offsets.ap), c, c_field_names,
+                c_type_codes, &out))
         return pyarrow_wrap_array(out)
 
     @staticmethod
-    def from_sparse(Array types, list children):
+    def from_sparse(Array types, list children, list field_names=None,
+                    list type_codes=None):
         """
         Construct sparse UnionArray from arrays of int8 types and children
         arrays
@@ -1089,6 +1102,8 @@ cdef class UnionArray(Array):
         ----------
         types : Array (int8 type)
         children : list
+        field_names : list
+        type_codes : list
 
         Returns
         -------
@@ -1097,10 +1112,21 @@ cdef class UnionArray(Array):
         cdef shared_ptr[CArray] out
         cdef vector[shared_ptr[CArray]] c
         cdef Array child
+        cdef vector[c_string] c_field_names
+        cdef vector[uint8_t] c_type_codes
         for child in children:
             c.push_back(child.sp_array)
+        if field_names is not None:
+            for x in field_names:
+                c_field_names.push_back(tobytes(x))
+        if type_codes is not None:
+            for x in type_codes:
+                c_type_codes.push_back(x)
         with nogil:
-            check_status(CUnionArray.MakeSparse(deref(types.ap), c, &out))
+            check_status(CUnionArray.MakeSparse(deref(types.ap), c,
+                                                c_field_names,
+                                                c_type_codes,
+                                                &out))
         return pyarrow_wrap_array(out)
 
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index e27f033..1649ee6 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -407,11 +407,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         @staticmethod
         CStatus MakeSparse(const CArray& type_ids,
                            const vector[shared_ptr[CArray]]& children,
+                           const vector[c_string]& field_names,
+                           const vector[uint8_t]& type_codes,
                            shared_ptr[CArray]* out)
 
         @staticmethod
         CStatus MakeDense(const CArray& type_ids, const CArray& value_offsets,
                           const vector[shared_ptr[CArray]]& children,
+                          const vector[c_string]& field_names,
+                          const vector[uint8_t]& type_codes,
                           shared_ptr[CArray]* out)
         uint8_t* raw_type_ids()
         int32_t value_offset(int i)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 8c0143c..ffbf7e3 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -407,9 +407,39 @@ def test_union_from_dense():
     types = pa.array([0, 1, 0, 0, 1, 1, 0], type='int8')
     value_offsets = pa.array([0, 0, 2, 1, 1, 2, 3], type='int32')
 
-    result = pa.UnionArray.from_dense(types, value_offsets, [binary, int64])
-
-    assert result.to_pylist() == [b'a', 1, b'c', b'b', 2, 3, b'd']
+    def check_result(result, expected_field_names, expected_type_codes):
+        assert result.to_pylist() == [b'a', 1, b'c', b'b', 2, 3, b'd']
+        actual_field_names = [result.type[i].name
+                              for i in range(result.type.num_children)]
+        assert actual_field_names == expected_field_names
+        assert result.type.type_codes == expected_type_codes
+
+    # without field names and type codes
+    check_result(pa.UnionArray.from_dense(types, value_offsets,
+                                          [binary, int64]),
+                 expected_field_names=['0', '1'],
+                 expected_type_codes=[0, 1])
+
+    # with field names
+    check_result(pa.UnionArray.from_dense(types, value_offsets,
+                                          [binary, int64],
+                                          ['bin', 'int']),
+                 expected_field_names=['bin', 'int'],
+                 expected_type_codes=[0, 1])
+
+    # with type codes
+    check_result(pa.UnionArray.from_dense(types, value_offsets,
+                                          [binary, int64],
+                                          type_codes=[11, 13]),
+                 expected_field_names=['0', '1'],
+                 expected_type_codes=[11, 13])
+
+    # with field names and type codes
+    check_result(pa.UnionArray.from_dense(types, value_offsets,
+                                          [binary, int64],
+                                          ['bin', 'int'], [11, 13]),
+                 expected_field_names=['bin', 'int'],
+                 expected_type_codes=[11, 13])
 
 
 def test_union_from_sparse():
@@ -418,9 +448,36 @@ def test_union_from_sparse():
     int64 = pa.array([0, 1, 0, 0, 2, 3, 0], type='int64')
     types = pa.array([0, 1, 0, 0, 1, 1, 0], type='int8')
 
-    result = pa.UnionArray.from_sparse(types, [binary, int64])
-
-    assert result.to_pylist() == [b'a', 1, b'b', b'c', 2, 3, b'd']
+    def check_result(result, expected_field_names, expected_type_codes):
+        assert result.to_pylist() == [b'a', 1, b'b', b'c', 2, 3, b'd']
+        actual_field_names = [result.type[i].name
+                              for i in range(result.type.num_children)]
+        assert actual_field_names == expected_field_names
+        assert result.type.type_codes == expected_type_codes
+
+    # without field names and type codes
+    check_result(pa.UnionArray.from_sparse(types, [binary, int64]),
+                 expected_field_names=['0', '1'],
+                 expected_type_codes=[0, 1])
+
+    # with field names
+    check_result(pa.UnionArray.from_sparse(types, [binary, int64],
+                                           ['bin', 'int']),
+                 expected_field_names=['bin', 'int'],
+                 expected_type_codes=[0, 1])
+
+    # with type codes
+    check_result(pa.UnionArray.from_sparse(types, [binary, int64],
+                                           type_codes=[11, 13]),
+                 expected_field_names=['0', '1'],
+                 expected_type_codes=[11, 13])
+
+    # with field names and type codes
+    check_result(pa.UnionArray.from_sparse(types, [binary, int64],
+                                           ['bin', 'int'],
+                                           [11, 13]),
+                 expected_field_names=['bin', 'int'],
+                 expected_type_codes=[11, 13])
 
 
 def test_union_array_slice():