You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/04/10 15:05:31 UTC

parquet-cpp git commit: PARQUET-947: Account for Arrow library consolidation in ARROW-795, API changes in ARROW-782

Repository: parquet-cpp
Updated Branches:
  refs/heads/master d0646659c -> cf31e6d1b


PARQUET-947: Account for Arrow library consolidation in ARROW-795, API changes in ARROW-782

Author: Wes McKinney <we...@twosigma.com>

Closes #292 from wesm/PARQUET-947 and squashes the following commits:

2d68d5b [Wes McKinney] Fix typo
35feebc [Wes McKinney] Update to Arrow HEAD
7fa2b1b [Wes McKinney] Account for API changes in ARROW-782
8d6c50d [Wes McKinney] Update Arrow version
7b2016f [Wes McKinney] Remove arrow_io library after ARROW-795


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/cf31e6d1
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/cf31e6d1
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/cf31e6d1

Branch: refs/heads/master
Commit: cf31e6d1bb27b807bd742cfb33179668c5afb2f3
Parents: d064665
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon Apr 10 11:05:24 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Apr 10 11:05:24 2017 -0400

----------------------------------------------------------------------
 CMakeLists.txt                          |  6 ++----
 cmake_modules/FindArrow.cmake           | 15 ++-------------
 cmake_modules/ThirdpartyToolchain.cmake | 25 +++----------------------
 src/parquet/arrow/CMakeLists.txt        |  3 ---
 src/parquet/arrow/parquet-arrow.pc.in   |  2 +-
 src/parquet/arrow/reader.cc             | 28 ++++++++++++++--------------
 src/parquet/arrow/schema.cc             | 18 +++++++++---------
 src/parquet/arrow/writer.cc             | 12 ++++++------
 8 files changed, 37 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ee31424..5c3d91b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -481,12 +481,10 @@ endif()
 
 if ("${PARQUET_ARROW_LINKAGE}" STREQUAL "shared")
   set(ARROW_LINK_LIBS
-    arrow
-    arrow_io)
+    arrow)
 else()
   set(ARROW_LINK_LIBS
-    arrow_static
-    arrow_io_static)
+    arrow_static)
 endif()
 
 #############################################################

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/FindArrow.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindArrow.cmake b/cmake_modules/FindArrow.cmake
index 0a3e7e2..c3f835b 100644
--- a/cmake_modules/FindArrow.cmake
+++ b/cmake_modules/FindArrow.cmake
@@ -46,30 +46,21 @@ find_library(ARROW_LIB_PATH NAMES arrow
   ${ARROW_SEARCH_LIB_PATH}
   NO_DEFAULT_PATH)
 
-find_library(ARROW_IO_LIB_PATH NAMES arrow_io
-  PATHS
-  ${ARROW_SEARCH_LIB_PATH}
-  NO_DEFAULT_PATH)
-
 if (ARROW_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ARROW_LIB_PATH))
   set(ARROW_FOUND TRUE)
   set(ARROW_HEADER_NAME arrow/api.h)
   set(ARROW_HEADER ${ARROW_INCLUDE_DIR}/${ARROW_HEADER_NAME})
   set(ARROW_LIB_NAME libarrow)
-  set(ARROW_IO_LIB_NAME libarrow_io)
 
   get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY)
   set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.a)
   set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
 
-  set(ARROW_IO_STATIC_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}.a)
-  set(ARROW_IO_SHARED_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
   if (NOT Arrow_FIND_QUIETLY)
     if (PARQUET_MINIMAL_DEPENDENCY)
-      message(STATUS "Found the Arrow core and IO header: ${ARROW_HEADER}")
+      message(STATUS "Found the Arrow header: ${ARROW_HEADER}")
     else ()
-      message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
-      message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}")
+      message(STATUS "Found the Arrow library: ${ARROW_LIB_PATH}")
     endif ()
   endif ()
 else ()
@@ -92,6 +83,4 @@ mark_as_advanced(
   ARROW_LIBS
   ARROW_STATIC_LIB
   ARROW_SHARED_LIB
-  ARROW_IO_STATIC_LIB
-  ARROW_IO_SHARED_LIB
 )

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index 0cb3ef7..1294f46 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0")
 
 # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
 set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
-set(ARROW_VERSION "15b874e47e3975c5240290ec7ed105bf8d1b56bc")
+set(ARROW_VERSION "c2f28cd07413e262fa0b741c286f86d5c7277c56")
 
 # find boost headers and libs
 set(Boost_DEBUG TRUE)
@@ -359,36 +359,25 @@ endif()
 
 ## Apache Arrow
 pkg_check_modules(ARROW arrow)
-pkg_check_modules(ARROW_IO arrow-io)
-if (ARROW_FOUND AND ARROW_IO_FOUND)
+if (ARROW_FOUND)
   set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS})
 
   if (COMMAND pkg_get_variable)
     pkg_get_variable(ARROW_ABI_VERSION arrow abi_version)
-    pkg_get_variable(ARROW_IO_ABI_VERSION arrow-io abi_version)
   else()
     set(ARROW_ABI_VERSION "")
-    set(ARROW_IO_ABI_VERSION "")
   endif()
   if (ARROW_ABI_VERSION STREQUAL "")
     set(ARROW_SHARED_LIB_SUFFIX "")
   else()
     set(ARROW_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}")
   endif()
-  if (ARROW_IO_ABI_VERSION STREQUAL "")
-    set(ARROW_IO_SHARED_LIB_SUFFIX "")
-  else()
-    set(ARROW_IO_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}")
-  endif()
 
   set(ARROW_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow)
-  set(ARROW_IO_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_io)
 
   set(ARROW_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_SHARED_LIB_SUFFIX})
   set(ARROW_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
 
-  set(ARROW_IO_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_IO_SHARED_LIB_SUFFIX})
-  set(ARROW_IO_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
   find_package(Arrow)
 endif()
@@ -397,9 +386,7 @@ if (NOT ARROW_FOUND)
   set(ARROW_HOME "${ARROW_PREFIX}")
   set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
   set(ARROW_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  set(ARROW_IO_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow_io${CMAKE_SHARED_LIBRARY_SUFFIX}")
   set(ARROW_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow.a")
-  set(ARROW_IO_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow_io.a")
   set(ARROW_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
     -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
     -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
@@ -412,7 +399,7 @@ if (NOT ARROW_FOUND)
     ExternalProject_Add(arrow_ep
       GIT_REPOSITORY https://github.com/apache/arrow.git
       GIT_TAG ${ARROW_VERSION}
-      BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_IO_SHARED_LIB}" "${ARROW_IO_STATIC_LIB}" "${ARROW_STATIC_LIB}"
+      BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_STATIC_LIB}"
       # With CMake 3.7.0 there is a SOURCE_SUBDIR argument which we can use
       # to specify that the CMakeLists.txt of Arrow is located in cpp/
       #
@@ -434,16 +421,10 @@ endif()
 include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
 add_library(arrow SHARED IMPORTED)
 set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_SHARED_LIB})
-add_library(arrow_io SHARED IMPORTED)
-set_target_properties(arrow_io PROPERTIES IMPORTED_LOCATION ${ARROW_IO_SHARED_LIB})
 add_library(arrow_static STATIC IMPORTED)
 set_target_properties(arrow_static PROPERTIES IMPORTED_LOCATION ${ARROW_STATIC_LIB})
-add_library(arrow_io_static STATIC IMPORTED)
-set_target_properties(arrow_io_static PROPERTIES IMPORTED_LOCATION ${ARROW_IO_STATIC_LIB})
 
 if (ARROW_VENDORED)
   add_dependencies(arrow arrow_ep)
-  add_dependencies(arrow_io arrow_ep)
   add_dependencies(arrow_static arrow_ep)
-  add_dependencies(arrow_io_static arrow_ep)
 endif()

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/CMakeLists.txt b/src/parquet/arrow/CMakeLists.txt
index 8bc6af7..c2fd901 100644
--- a/src/parquet/arrow/CMakeLists.txt
+++ b/src/parquet/arrow/CMakeLists.txt
@@ -31,7 +31,6 @@ add_library(parquet_arrow_objlib OBJECT
 # Add dependencies so ExternalProjects are built beforehand
 add_dependencies(parquet_arrow_objlib
     arrow_static
-    arrow_io_static
     parquet_static)
 
 # SET_TARGET_PROPERTIES(parquet_arrow PROPERTIES LINKER_LANGUAGE CXX)
@@ -47,7 +46,6 @@ if (PARQUET_BUILD_SHARED)
       SOVERSION "${PARQUET_SO_VERSION}")
     target_link_libraries(parquet_arrow_shared
       arrow
-      arrow_io
       parquet_shared)
     if (PARQUET_RPATH_ORIGIN)
         if (APPLE)
@@ -77,7 +75,6 @@ if (PARQUET_BUILD_STATIC)
       OUTPUT_NAME "parquet_arrow")
   target_link_libraries(parquet_arrow_static
       arrow_static
-      arrow_io_static
       parquet_static)
   install(TARGETS parquet_arrow_static
       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/parquet-arrow.pc.in
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/parquet-arrow.pc.in b/src/parquet/arrow/parquet-arrow.pc.in
index 511e0b6..20056bc 100644
--- a/src/parquet/arrow/parquet-arrow.pc.in
+++ b/src/parquet/arrow/parquet-arrow.pc.in
@@ -24,4 +24,4 @@ Description: Apache Parquet Apache arrow adapter provides Arrow IPC modules for
 Version: @PARQUET_VERSION@
 Libs: -L${libdir} -lparquet_arrow
 Cflags: -I${includedir}
-Requires: parquet arrow-io
+Requires: parquet arrow

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 823aea9..2ca9207 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -724,23 +724,23 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
     std::vector<bool> nullable;
     std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders;
     std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders;
-    nullable.push_back(current_field->nullable);
-    while (current_field->type->num_children() > 0) {
-      if (current_field->type->num_children() > 1) {
+    nullable.push_back(current_field->nullable());
+    while (current_field->type()->num_children() > 0) {
+      if (current_field->type()->num_children() > 1) {
         return Status::NotImplemented(
             "Fields with more than one child are not supported.");
       } else {
-        if (current_field->type->type != ::arrow::Type::LIST) {
+        if (current_field->type()->id() != ::arrow::Type::LIST) {
           return Status::NotImplemented(
               "Currently only nesting with Lists is supported.");
         }
-        current_field = current_field->type->child(0);
+        current_field = current_field->type()->child(0);
       }
       offset_builders.emplace_back(
           std::make_shared<::arrow::Int32Builder>(pool_, ::arrow::int32()));
       valid_bits_builders.emplace_back(
           std::make_shared<::arrow::BooleanBuilder>(pool_, ::arrow::boolean()));
-      nullable.push_back(current_field->nullable);
+      nullable.push_back(current_field->nullable());
     }
 
     int64_t list_depth = offset_builders.size();
@@ -860,12 +860,12 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr<Array>
           ::arrow::BitUtil::CeilByte(valid_bits_idx_) / 8, false));
     }
     *out = std::make_shared<ArrayType<ArrowType>>(
-        field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
+        field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
     // Relase the ownership as the Buffer is now part of a new Array
     valid_bits_buffer_.reset();
   } else {
     *out = std::make_shared<ArrayType<ArrowType>>(
-        field_->type, valid_bits_idx_, data_buffer_);
+        field_->type(), valid_bits_idx_, data_buffer_);
   }
   // Relase the ownership as the Buffer is now part of a new Array
   data_buffer_.reset();
@@ -934,12 +934,12 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
       valid_bits_buffer_ = valid_bits_buffer;
     }
     *out = std::make_shared<BooleanArray>(
-        field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
+        field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
     // Relase the ownership
     data_buffer_.reset();
     valid_bits_buffer_.reset();
   } else {
-    *out = std::make_shared<BooleanArray>(field_->type, valid_bits_idx_, data_buffer_);
+    *out = std::make_shared<BooleanArray>(field_->type(), valid_bits_idx_, data_buffer_);
     data_buffer_.reset();
   }
 
@@ -1028,7 +1028,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
     return Status::OK();
   }
 
-  switch (field_->type->type) {
+  switch (field_->type()->id()) {
     TYPED_BATCH_CASE(BOOL, ::arrow::BooleanType, BooleanType)
     TYPED_BATCH_CASE(UINT8, ::arrow::UInt8Type, Int32Type)
     TYPED_BATCH_CASE(INT8, ::arrow::Int8Type, Int32Type)
@@ -1045,8 +1045,8 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
     TYPED_BATCH_CASE(BINARY, ::arrow::BinaryType, ByteArrayType)
     case ::arrow::Type::TIMESTAMP: {
       ::arrow::TimestampType* timestamp_type =
-          static_cast<::arrow::TimestampType*>(field_->type.get());
-      switch (timestamp_type->unit) {
+          static_cast<::arrow::TimestampType*>(field_->type().get());
+      switch (timestamp_type->unit()) {
         case ::arrow::TimeUnit::MILLI:
           return TypedReadBatch<::arrow::TimestampType, Int64Type>(batch_size, out);
           break;
@@ -1060,7 +1060,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
     }
     default:
       std::stringstream ss;
-      ss << "No support for reading columns of type " << field_->type->ToString();
+      ss << "No support for reading columns of type " << field_->type()->ToString();
       return Status::NotImplemented(ss.str());
   }
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
index f0d05fc..76b7f77 100644
--- a/src/parquet/arrow/schema.cc
+++ b/src/parquet/arrow/schema.cc
@@ -327,10 +327,10 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
   LogicalType::type logical_type = LogicalType::NONE;
   ParquetType::type type;
   Repetition::type repetition =
-      field->nullable ? Repetition::OPTIONAL : Repetition::REQUIRED;
+      field->nullable() ? Repetition::OPTIONAL : Repetition::REQUIRED;
   int length = -1;
 
-  switch (field->type->type) {
+  switch (field->type()->id()) {
     // TODO:
     // case ArrowType::NA:
     // break;
@@ -393,8 +393,8 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
       logical_type = LogicalType::DATE;
       break;
     case ArrowType::TIMESTAMP: {
-      auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type.get());
-      if (timestamp_type->unit != ::arrow::TimestampType::Unit::MILLI) {
+      auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type().get());
+      if (timestamp_type->unit() != ::arrow::TimestampType::Unit::MILLI) {
         return Status::NotImplemented(
             "Other timestamp units than millisecond are not yet support with parquet.");
       }
@@ -410,18 +410,18 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
       logical_type = LogicalType::TIME_MICROS;
       break;
     case ArrowType::STRUCT: {
-      auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type);
-      return StructToNode(struct_type, field->name, field->nullable, properties, out);
+      auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
+      return StructToNode(struct_type, field->name(), field->nullable(), properties, out);
     } break;
     case ArrowType::LIST: {
-      auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type);
-      return ListToNode(list_type, field->name, field->nullable, properties, out);
+      auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type());
+      return ListToNode(list_type, field->name(), field->nullable(), properties, out);
     } break;
     default:
       // TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, DECIMAL_TEXT, VARCHAR
       return Status::NotImplemented("unhandled type");
   }
-  *out = PrimitiveNode::Make(field->name, repetition, type, logical_type, length);
+  *out = PrimitiveNode::Make(field->name(), repetition, type, logical_type, length);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index a92537a..5933937 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -61,7 +61,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
     array_offsets_.push_back(array.offset());                           \
     valid_bitmaps_.push_back(array.null_bitmap_data());                 \
     null_counts_.push_back(array.null_count());                         \
-    values_type_ = array.type_enum();                                   \
+    values_type_ = array.type_id();                                     \
     values_array_ = &array;                                             \
     return Status::OK();                                                \
   }
@@ -125,15 +125,15 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
 
     // Walk downwards to extract nullability
     std::shared_ptr<Field> current_field = field;
-    nullable_.push_back(current_field->nullable);
-    while (current_field->type->num_children() > 0) {
-      if (current_field->type->num_children() > 1) {
+    nullable_.push_back(current_field->nullable());
+    while (current_field->type()->num_children() > 0) {
+      if (current_field->type()->num_children() > 1) {
         return Status::NotImplemented(
             "Fields with more than one child are not supported.");
       } else {
-        current_field = current_field->type->child(0);
+        current_field = current_field->type()->child(0);
       }
-      nullable_.push_back(current_field->nullable);
+      nullable_.push_back(current_field->nullable());
     }
 
     // Generate the levels.