You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/04/10 15:05:31 UTC
parquet-cpp git commit: PARQUET-947: Account for Arrow library
consolidation in ARROW-795, API changes in ARROW-782
Repository: parquet-cpp
Updated Branches:
refs/heads/master d0646659c -> cf31e6d1b
PARQUET-947: Account for Arrow library consolidation in ARROW-795, API changes in ARROW-782
Author: Wes McKinney <we...@twosigma.com>
Closes #292 from wesm/PARQUET-947 and squashes the following commits:
2d68d5b [Wes McKinney] Fix typo
35feebc [Wes McKinney] Update to Arrow HEAD
7fa2b1b [Wes McKinney] Account for API changes in ARROW-782
8d6c50d [Wes McKinney] Update Arrow version
7b2016f [Wes McKinney] Remove arrow_io library after ARROW-795
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/cf31e6d1
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/cf31e6d1
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/cf31e6d1
Branch: refs/heads/master
Commit: cf31e6d1bb27b807bd742cfb33179668c5afb2f3
Parents: d064665
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon Apr 10 11:05:24 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Apr 10 11:05:24 2017 -0400
----------------------------------------------------------------------
CMakeLists.txt | 6 ++----
cmake_modules/FindArrow.cmake | 15 ++-------------
cmake_modules/ThirdpartyToolchain.cmake | 25 +++----------------------
src/parquet/arrow/CMakeLists.txt | 3 ---
src/parquet/arrow/parquet-arrow.pc.in | 2 +-
src/parquet/arrow/reader.cc | 28 ++++++++++++++--------------
src/parquet/arrow/schema.cc | 18 +++++++++---------
src/parquet/arrow/writer.cc | 12 ++++++------
8 files changed, 37 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ee31424..5c3d91b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -481,12 +481,10 @@ endif()
if ("${PARQUET_ARROW_LINKAGE}" STREQUAL "shared")
set(ARROW_LINK_LIBS
- arrow
- arrow_io)
+ arrow)
else()
set(ARROW_LINK_LIBS
- arrow_static
- arrow_io_static)
+ arrow_static)
endif()
#############################################################
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/FindArrow.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindArrow.cmake b/cmake_modules/FindArrow.cmake
index 0a3e7e2..c3f835b 100644
--- a/cmake_modules/FindArrow.cmake
+++ b/cmake_modules/FindArrow.cmake
@@ -46,30 +46,21 @@ find_library(ARROW_LIB_PATH NAMES arrow
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
-find_library(ARROW_IO_LIB_PATH NAMES arrow_io
- PATHS
- ${ARROW_SEARCH_LIB_PATH}
- NO_DEFAULT_PATH)
-
if (ARROW_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ARROW_LIB_PATH))
set(ARROW_FOUND TRUE)
set(ARROW_HEADER_NAME arrow/api.h)
set(ARROW_HEADER ${ARROW_INCLUDE_DIR}/${ARROW_HEADER_NAME})
set(ARROW_LIB_NAME libarrow)
- set(ARROW_IO_LIB_NAME libarrow_io)
get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY)
set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.a)
set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(ARROW_IO_STATIC_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}.a)
- set(ARROW_IO_SHARED_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
if (NOT Arrow_FIND_QUIETLY)
if (PARQUET_MINIMAL_DEPENDENCY)
- message(STATUS "Found the Arrow core and IO header: ${ARROW_HEADER}")
+ message(STATUS "Found the Arrow header: ${ARROW_HEADER}")
else ()
- message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
- message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}")
+ message(STATUS "Found the Arrow library: ${ARROW_LIB_PATH}")
endif ()
endif ()
else ()
@@ -92,6 +83,4 @@ mark_as_advanced(
ARROW_LIBS
ARROW_STATIC_LIB
ARROW_SHARED_LIB
- ARROW_IO_STATIC_LIB
- ARROW_IO_SHARED_LIB
)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index 0cb3ef7..1294f46 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0")
# Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
-set(ARROW_VERSION "15b874e47e3975c5240290ec7ed105bf8d1b56bc")
+set(ARROW_VERSION "c2f28cd07413e262fa0b741c286f86d5c7277c56")
# find boost headers and libs
set(Boost_DEBUG TRUE)
@@ -359,36 +359,25 @@ endif()
## Apache Arrow
pkg_check_modules(ARROW arrow)
-pkg_check_modules(ARROW_IO arrow-io)
-if (ARROW_FOUND AND ARROW_IO_FOUND)
+if (ARROW_FOUND)
set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS})
if (COMMAND pkg_get_variable)
pkg_get_variable(ARROW_ABI_VERSION arrow abi_version)
- pkg_get_variable(ARROW_IO_ABI_VERSION arrow-io abi_version)
else()
set(ARROW_ABI_VERSION "")
- set(ARROW_IO_ABI_VERSION "")
endif()
if (ARROW_ABI_VERSION STREQUAL "")
set(ARROW_SHARED_LIB_SUFFIX "")
else()
set(ARROW_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}")
endif()
- if (ARROW_IO_ABI_VERSION STREQUAL "")
- set(ARROW_IO_SHARED_LIB_SUFFIX "")
- else()
- set(ARROW_IO_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}")
- endif()
set(ARROW_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow)
- set(ARROW_IO_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_io)
set(ARROW_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_SHARED_LIB_SUFFIX})
set(ARROW_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
- set(ARROW_IO_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_IO_SHARED_LIB_SUFFIX})
- set(ARROW_IO_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
find_package(Arrow)
endif()
@@ -397,9 +386,7 @@ if (NOT ARROW_FOUND)
set(ARROW_HOME "${ARROW_PREFIX}")
set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
set(ARROW_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}")
- set(ARROW_IO_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow_io${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(ARROW_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow.a")
- set(ARROW_IO_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow_io.a")
set(ARROW_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
@@ -412,7 +399,7 @@ if (NOT ARROW_FOUND)
ExternalProject_Add(arrow_ep
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG ${ARROW_VERSION}
- BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_IO_SHARED_LIB}" "${ARROW_IO_STATIC_LIB}" "${ARROW_STATIC_LIB}"
+ BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_STATIC_LIB}"
# With CMake 3.7.0 there is a SOURCE_SUBDIR argument which we can use
# to specify that the CMakeLists.txt of Arrow is located in cpp/
#
@@ -434,16 +421,10 @@ endif()
include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
add_library(arrow SHARED IMPORTED)
set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_SHARED_LIB})
-add_library(arrow_io SHARED IMPORTED)
-set_target_properties(arrow_io PROPERTIES IMPORTED_LOCATION ${ARROW_IO_SHARED_LIB})
add_library(arrow_static STATIC IMPORTED)
set_target_properties(arrow_static PROPERTIES IMPORTED_LOCATION ${ARROW_STATIC_LIB})
-add_library(arrow_io_static STATIC IMPORTED)
-set_target_properties(arrow_io_static PROPERTIES IMPORTED_LOCATION ${ARROW_IO_STATIC_LIB})
if (ARROW_VENDORED)
add_dependencies(arrow arrow_ep)
- add_dependencies(arrow_io arrow_ep)
add_dependencies(arrow_static arrow_ep)
- add_dependencies(arrow_io_static arrow_ep)
endif()
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/CMakeLists.txt b/src/parquet/arrow/CMakeLists.txt
index 8bc6af7..c2fd901 100644
--- a/src/parquet/arrow/CMakeLists.txt
+++ b/src/parquet/arrow/CMakeLists.txt
@@ -31,7 +31,6 @@ add_library(parquet_arrow_objlib OBJECT
# Add dependencies so ExternalProjects are built beforehand
add_dependencies(parquet_arrow_objlib
arrow_static
- arrow_io_static
parquet_static)
# SET_TARGET_PROPERTIES(parquet_arrow PROPERTIES LINKER_LANGUAGE CXX)
@@ -47,7 +46,6 @@ if (PARQUET_BUILD_SHARED)
SOVERSION "${PARQUET_SO_VERSION}")
target_link_libraries(parquet_arrow_shared
arrow
- arrow_io
parquet_shared)
if (PARQUET_RPATH_ORIGIN)
if (APPLE)
@@ -77,7 +75,6 @@ if (PARQUET_BUILD_STATIC)
OUTPUT_NAME "parquet_arrow")
target_link_libraries(parquet_arrow_static
arrow_static
- arrow_io_static
parquet_static)
install(TARGETS parquet_arrow_static
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/parquet-arrow.pc.in
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/parquet-arrow.pc.in b/src/parquet/arrow/parquet-arrow.pc.in
index 511e0b6..20056bc 100644
--- a/src/parquet/arrow/parquet-arrow.pc.in
+++ b/src/parquet/arrow/parquet-arrow.pc.in
@@ -24,4 +24,4 @@ Description: Apache Parquet Apache arrow adapter provides Arrow IPC modules for
Version: @PARQUET_VERSION@
Libs: -L${libdir} -lparquet_arrow
Cflags: -I${includedir}
-Requires: parquet arrow-io
+Requires: parquet arrow
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 823aea9..2ca9207 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -724,23 +724,23 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
std::vector<bool> nullable;
std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders;
std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders;
- nullable.push_back(current_field->nullable);
- while (current_field->type->num_children() > 0) {
- if (current_field->type->num_children() > 1) {
+ nullable.push_back(current_field->nullable());
+ while (current_field->type()->num_children() > 0) {
+ if (current_field->type()->num_children() > 1) {
return Status::NotImplemented(
"Fields with more than one child are not supported.");
} else {
- if (current_field->type->type != ::arrow::Type::LIST) {
+ if (current_field->type()->id() != ::arrow::Type::LIST) {
return Status::NotImplemented(
"Currently only nesting with Lists is supported.");
}
- current_field = current_field->type->child(0);
+ current_field = current_field->type()->child(0);
}
offset_builders.emplace_back(
std::make_shared<::arrow::Int32Builder>(pool_, ::arrow::int32()));
valid_bits_builders.emplace_back(
std::make_shared<::arrow::BooleanBuilder>(pool_, ::arrow::boolean()));
- nullable.push_back(current_field->nullable);
+ nullable.push_back(current_field->nullable());
}
int64_t list_depth = offset_builders.size();
@@ -860,12 +860,12 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr<Array>
::arrow::BitUtil::CeilByte(valid_bits_idx_) / 8, false));
}
*out = std::make_shared<ArrayType<ArrowType>>(
- field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
+ field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
// Relase the ownership as the Buffer is now part of a new Array
valid_bits_buffer_.reset();
} else {
*out = std::make_shared<ArrayType<ArrowType>>(
- field_->type, valid_bits_idx_, data_buffer_);
+ field_->type(), valid_bits_idx_, data_buffer_);
}
// Relase the ownership as the Buffer is now part of a new Array
data_buffer_.reset();
@@ -934,12 +934,12 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
valid_bits_buffer_ = valid_bits_buffer;
}
*out = std::make_shared<BooleanArray>(
- field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
+ field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
// Relase the ownership
data_buffer_.reset();
valid_bits_buffer_.reset();
} else {
- *out = std::make_shared<BooleanArray>(field_->type, valid_bits_idx_, data_buffer_);
+ *out = std::make_shared<BooleanArray>(field_->type(), valid_bits_idx_, data_buffer_);
data_buffer_.reset();
}
@@ -1028,7 +1028,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
return Status::OK();
}
- switch (field_->type->type) {
+ switch (field_->type()->id()) {
TYPED_BATCH_CASE(BOOL, ::arrow::BooleanType, BooleanType)
TYPED_BATCH_CASE(UINT8, ::arrow::UInt8Type, Int32Type)
TYPED_BATCH_CASE(INT8, ::arrow::Int8Type, Int32Type)
@@ -1045,8 +1045,8 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
TYPED_BATCH_CASE(BINARY, ::arrow::BinaryType, ByteArrayType)
case ::arrow::Type::TIMESTAMP: {
::arrow::TimestampType* timestamp_type =
- static_cast<::arrow::TimestampType*>(field_->type.get());
- switch (timestamp_type->unit) {
+ static_cast<::arrow::TimestampType*>(field_->type().get());
+ switch (timestamp_type->unit()) {
case ::arrow::TimeUnit::MILLI:
return TypedReadBatch<::arrow::TimestampType, Int64Type>(batch_size, out);
break;
@@ -1060,7 +1060,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
}
default:
std::stringstream ss;
- ss << "No support for reading columns of type " << field_->type->ToString();
+ ss << "No support for reading columns of type " << field_->type()->ToString();
return Status::NotImplemented(ss.str());
}
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
index f0d05fc..76b7f77 100644
--- a/src/parquet/arrow/schema.cc
+++ b/src/parquet/arrow/schema.cc
@@ -327,10 +327,10 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
LogicalType::type logical_type = LogicalType::NONE;
ParquetType::type type;
Repetition::type repetition =
- field->nullable ? Repetition::OPTIONAL : Repetition::REQUIRED;
+ field->nullable() ? Repetition::OPTIONAL : Repetition::REQUIRED;
int length = -1;
- switch (field->type->type) {
+ switch (field->type()->id()) {
// TODO:
// case ArrowType::NA:
// break;
@@ -393,8 +393,8 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
logical_type = LogicalType::DATE;
break;
case ArrowType::TIMESTAMP: {
- auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type.get());
- if (timestamp_type->unit != ::arrow::TimestampType::Unit::MILLI) {
+ auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type().get());
+ if (timestamp_type->unit() != ::arrow::TimestampType::Unit::MILLI) {
return Status::NotImplemented(
"Other timestamp units than millisecond are not yet support with parquet.");
}
@@ -410,18 +410,18 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
logical_type = LogicalType::TIME_MICROS;
break;
case ArrowType::STRUCT: {
- auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type);
- return StructToNode(struct_type, field->name, field->nullable, properties, out);
+ auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
+ return StructToNode(struct_type, field->name(), field->nullable(), properties, out);
} break;
case ArrowType::LIST: {
- auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type);
- return ListToNode(list_type, field->name, field->nullable, properties, out);
+ auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type());
+ return ListToNode(list_type, field->name(), field->nullable(), properties, out);
} break;
default:
// TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, DECIMAL_TEXT, VARCHAR
return Status::NotImplemented("unhandled type");
}
- *out = PrimitiveNode::Make(field->name, repetition, type, logical_type, length);
+ *out = PrimitiveNode::Make(field->name(), repetition, type, logical_type, length);
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index a92537a..5933937 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -61,7 +61,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
array_offsets_.push_back(array.offset()); \
valid_bitmaps_.push_back(array.null_bitmap_data()); \
null_counts_.push_back(array.null_count()); \
- values_type_ = array.type_enum(); \
+ values_type_ = array.type_id(); \
values_array_ = &array; \
return Status::OK(); \
}
@@ -125,15 +125,15 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
// Walk downwards to extract nullability
std::shared_ptr<Field> current_field = field;
- nullable_.push_back(current_field->nullable);
- while (current_field->type->num_children() > 0) {
- if (current_field->type->num_children() > 1) {
+ nullable_.push_back(current_field->nullable());
+ while (current_field->type()->num_children() > 0) {
+ if (current_field->type()->num_children() > 1) {
return Status::NotImplemented(
"Fields with more than one child are not supported.");
} else {
- current_field = current_field->type->child(0);
+ current_field = current_field->type()->child(0);
}
- nullable_.push_back(current_field->nullable);
+ nullable_.push_back(current_field->nullable());
}
// Generate the levels.