You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/03/23 13:47:01 UTC
parquet-cpp git commit: PARQUET-919: Account for ARROW-683 changes,
but make no functional changes. Set PARQUET_ARROW=on by default
Repository: parquet-cpp
Updated Branches:
refs/heads/master d7d01d765 -> 02a9f0dbf
PARQUET-919: Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default
Author: Wes McKinney <we...@twosigma.com>
Closes #272 from wesm/PARQUET-919 and squashes the following commits:
8ece0e8 [Wes McKinney] Skip generated Thrift files in cpplint
4ad23ce [Wes McKinney] Update Arrow version
9006dd1 [Wes McKinney] Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/02a9f0db
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/02a9f0db
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/02a9f0db
Branch: refs/heads/master
Commit: 02a9f0dbff0aa59da9c000148e866081419a4348
Parents: d7d01d7
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Mar 23 09:46:53 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Mar 23 09:46:53 2017 -0400
----------------------------------------------------------------------
CMakeLists.txt | 4 ++--
cmake_modules/ThirdpartyToolchain.cmake | 2 +-
src/parquet/arrow/arrow-reader-writer-test.cc | 6 +++---
src/parquet/arrow/arrow-schema-test.cc | 4 ++--
src/parquet/arrow/reader.cc | 6 +++---
src/parquet/arrow/schema.cc | 8 ++++++--
src/parquet/arrow/test-util.h | 2 +-
src/parquet/arrow/writer.cc | 8 ++++----
8 files changed, 22 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6dc7866..75f855b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,7 +92,7 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
ON)
option(PARQUET_ARROW
"Build the Arrow support"
- OFF)
+ ON)
option(PARQUET_ZLIB_VENDORED
"Build our own zlib (some libz.a aren't configured for static linking)"
ON)
@@ -388,7 +388,7 @@ if (UNIX)
--verbose=2
--linelength=90
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-runtime/references,-readability/check,-build/c++11,-build/include_order
- `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/parquet_/g'`)
endif (UNIX)
############################################################
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index ab25199..1e43308 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0")
# Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
-set(ARROW_VERSION "fa8d27f314b7c21c611d1c5caaa9b32ae0cb2b06")
+set(ARROW_VERSION "e8f6a492d30d32cd67fe3a537b3aec4cbae566c9")
# find boost headers and libs
# Find shared Boost libraries.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index ae3002d..4598cab 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -148,13 +148,13 @@ struct test_traits<::arrow::TimestampType> {
const int64_t test_traits<::arrow::TimestampType>::value(14695634030000);
template <>
-struct test_traits<::arrow::DateType> {
+struct test_traits<::arrow::Date64Type> {
static constexpr ParquetType::type parquet_enum = ParquetType::INT32;
static constexpr LogicalType::type logical_enum = LogicalType::DATE;
static int64_t const value;
};
-const int64_t test_traits<::arrow::DateType>::value(14688000000000);
+const int64_t test_traits<::arrow::Date64Type>::value(14688000000000);
template <>
struct test_traits<::arrow::FloatType> {
@@ -317,7 +317,7 @@ class TestParquetIO : public ::testing::Test {
typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type,
- ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::DateType, ::arrow::FloatType,
+ ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::Date64Type, ::arrow::FloatType,
::arrow::DoubleType, ::arrow::StringType, ::arrow::BinaryType>
TestTypes;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-schema-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 8db792f..83100d3 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -100,7 +100,7 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
parquet_fields.push_back(PrimitiveNode::Make(
"date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
- arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false));
+ arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false));
parquet_fields.push_back(
PrimitiveNode::Make("timestamp96", Repetition::REQUIRED, ParquetType::INT96));
@@ -397,7 +397,7 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
parquet_fields.push_back(PrimitiveNode::Make(
"date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
- arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false));
+ arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 37683c1..d1bf38e 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -369,7 +369,7 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::TimestampType, Int96Typ
}
template <>
-Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::DateType, Int32Type>(
+Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>(
TypedColumnReader<Int32Type>* reader, int64_t values_to_read, int64_t* levels_read) {
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
@@ -480,7 +480,7 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::TimestampType, Int96Type>(
}
template <>
-Status ColumnReader::Impl::ReadNullableBatch<::arrow::DateType, Int32Type>(
+Status ColumnReader::Impl::ReadNullableBatch<::arrow::Date64Type, Int32Type>(
TypedColumnReader<Int32Type>* reader, int16_t* def_levels, int16_t* rep_levels,
int64_t values_to_read, int64_t* levels_read, int64_t* values_read) {
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
@@ -883,7 +883,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
TYPED_BATCH_CASE(INT16, ::arrow::Int16Type, Int32Type)
TYPED_BATCH_CASE(UINT32, ::arrow::UInt32Type, Int32Type)
TYPED_BATCH_CASE(INT32, ::arrow::Int32Type, Int32Type)
- TYPED_BATCH_CASE(DATE, ::arrow::DateType, Int32Type)
+ TYPED_BATCH_CASE(DATE64, ::arrow::Date64Type, Int32Type)
TYPED_BATCH_CASE(UINT64, ::arrow::UInt64Type, Int64Type)
TYPED_BATCH_CASE(INT64, ::arrow::Int64Type, Int64Type)
TYPED_BATCH_CASE(FLOAT, ::arrow::FloatType, FloatType)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
index 0c336d9..ed989cb 100644
--- a/src/parquet/arrow/schema.cc
+++ b/src/parquet/arrow/schema.cc
@@ -108,7 +108,7 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
*out = ::arrow::uint32();
break;
case LogicalType::DATE:
- *out = ::arrow::date();
+ *out = ::arrow::date64();
break;
case LogicalType::DECIMAL:
*out = MakeDecimalType(node);
@@ -378,7 +378,11 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
case ArrowType::BINARY:
type = ParquetType::BYTE_ARRAY;
break;
- case ArrowType::DATE:
+ case ArrowType::DATE32:
+ type = ParquetType::INT32;
+ logical_type = LogicalType::DATE;
+ break;
+ case ArrowType::DATE64:
type = ParquetType::INT32;
logical_type = LogicalType::DATE;
break;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 07f1f28..1cf1376 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -34,7 +34,7 @@ template <typename ArrowType>
using is_arrow_int = std::is_integral<typename ArrowType::c_type>;
template <typename ArrowType>
-using is_arrow_date = std::is_same<ArrowType, ::arrow::DateType>;
+using is_arrow_date = std::is_same<ArrowType, ::arrow::Date64Type>;
template <typename ArrowType>
using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index 6e927cd..f2ee734 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -80,7 +80,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
PRIMITIVE_VISIT(Double)
PRIMITIVE_VISIT(String)
PRIMITIVE_VISIT(Binary)
- PRIMITIVE_VISIT(Date)
+ PRIMITIVE_VISIT(Date64)
PRIMITIVE_VISIT(Time)
PRIMITIVE_VISIT(Timestamp)
PRIMITIVE_VISIT(Interval)
@@ -332,7 +332,7 @@ Status FileWriter::Impl::WriteNonNullableBatch(TypedColumnWriter<ParquetType>* w
}
template <>
-Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::DateType>(
+Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::Date64Type>(
TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels,
const int16_t* def_levels, const int16_t* rep_levels, const int64_t* data_ptr) {
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t)));
@@ -384,7 +384,7 @@ Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter<ParquetType>* writ
}
template <>
-Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::DateType>(
+Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::Date64Type>(
TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels,
const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
int64_t valid_bits_offset, const int64_t* data_ptr) {
@@ -555,7 +555,7 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) {
WRITE_BATCH_CASE(INT16, Int16Type, Int32Type)
WRITE_BATCH_CASE(UINT16, UInt16Type, Int32Type)
WRITE_BATCH_CASE(INT32, Int32Type, Int32Type)
- WRITE_BATCH_CASE(DATE, DateType, Int32Type)
+ WRITE_BATCH_CASE(DATE64, Date64Type, Int32Type)
WRITE_BATCH_CASE(INT64, Int64Type, Int64Type)
WRITE_BATCH_CASE(TIMESTAMP, TimestampType, Int64Type)
WRITE_BATCH_CASE(UINT64, UInt64Type, Int64Type)