You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/03/23 13:47:01 UTC

parquet-cpp git commit: PARQUET-919: Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default

Repository: parquet-cpp
Updated Branches:
  refs/heads/master d7d01d765 -> 02a9f0dbf


PARQUET-919: Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default

Author: Wes McKinney <we...@twosigma.com>

Closes #272 from wesm/PARQUET-919 and squashes the following commits:

8ece0e8 [Wes McKinney] Skip generated Thrift files in cpplint
4ad23ce [Wes McKinney] Update Arrow version
9006dd1 [Wes McKinney] Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/02a9f0db
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/02a9f0db
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/02a9f0db

Branch: refs/heads/master
Commit: 02a9f0dbff0aa59da9c000148e866081419a4348
Parents: d7d01d7
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Mar 23 09:46:53 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Mar 23 09:46:53 2017 -0400

----------------------------------------------------------------------
 CMakeLists.txt                                | 4 ++--
 cmake_modules/ThirdpartyToolchain.cmake       | 2 +-
 src/parquet/arrow/arrow-reader-writer-test.cc | 6 +++---
 src/parquet/arrow/arrow-schema-test.cc        | 4 ++--
 src/parquet/arrow/reader.cc                   | 6 +++---
 src/parquet/arrow/schema.cc                   | 8 ++++++--
 src/parquet/arrow/test-util.h                 | 2 +-
 src/parquet/arrow/writer.cc                   | 8 ++++----
 8 files changed, 22 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6dc7866..75f855b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,7 +92,7 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     ON)
   option(PARQUET_ARROW
     "Build the Arrow support"
-    OFF)
+    ON)
   option(PARQUET_ZLIB_VENDORED
     "Build our own zlib (some libz.a aren't configured for static linking)"
     ON)
@@ -388,7 +388,7 @@ if (UNIX)
   --verbose=2
   --linelength=90
   --filter=-whitespace/comments,-readability/todo,-build/header_guard,-runtime/references,-readability/check,-build/c++11,-build/include_order
-    `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples  ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
+    `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples  ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/parquet_/g'`)
 endif (UNIX)
 
 ############################################################

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index ab25199..1e43308 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0")
 
 # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
 set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
-set(ARROW_VERSION "fa8d27f314b7c21c611d1c5caaa9b32ae0cb2b06")
+set(ARROW_VERSION "e8f6a492d30d32cd67fe3a537b3aec4cbae566c9")
 
 # find boost headers and libs
 # Find shared Boost libraries.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index ae3002d..4598cab 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -148,13 +148,13 @@ struct test_traits<::arrow::TimestampType> {
 const int64_t test_traits<::arrow::TimestampType>::value(14695634030000);
 
 template <>
-struct test_traits<::arrow::DateType> {
+struct test_traits<::arrow::Date64Type> {
   static constexpr ParquetType::type parquet_enum = ParquetType::INT32;
   static constexpr LogicalType::type logical_enum = LogicalType::DATE;
   static int64_t const value;
 };
 
-const int64_t test_traits<::arrow::DateType>::value(14688000000000);
+const int64_t test_traits<::arrow::Date64Type>::value(14688000000000);
 
 template <>
 struct test_traits<::arrow::FloatType> {
@@ -317,7 +317,7 @@ class TestParquetIO : public ::testing::Test {
 
 typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
     ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type,
-    ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::DateType, ::arrow::FloatType,
+    ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::Date64Type, ::arrow::FloatType,
     ::arrow::DoubleType, ::arrow::StringType, ::arrow::BinaryType>
     TestTypes;
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-schema-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 8db792f..83100d3 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -100,7 +100,7 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
 
   parquet_fields.push_back(PrimitiveNode::Make(
       "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
-  arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false));
+  arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false));
 
   parquet_fields.push_back(
       PrimitiveNode::Make("timestamp96", Repetition::REQUIRED, ParquetType::INT96));
@@ -397,7 +397,7 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
 
   parquet_fields.push_back(PrimitiveNode::Make(
       "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
-  arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false));
+  arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false));
 
   parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
       ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 37683c1..d1bf38e 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -369,7 +369,7 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::TimestampType, Int96Typ
 }
 
 template <>
-Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::DateType, Int32Type>(
+Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>(
     TypedColumnReader<Int32Type>* reader, int64_t values_to_read, int64_t* levels_read) {
   RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
   auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
@@ -480,7 +480,7 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::TimestampType, Int96Type>(
 }
 
 template <>
-Status ColumnReader::Impl::ReadNullableBatch<::arrow::DateType, Int32Type>(
+Status ColumnReader::Impl::ReadNullableBatch<::arrow::Date64Type, Int32Type>(
     TypedColumnReader<Int32Type>* reader, int16_t* def_levels, int16_t* rep_levels,
     int64_t values_to_read, int64_t* levels_read, int64_t* values_read) {
   RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
@@ -883,7 +883,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out
     TYPED_BATCH_CASE(INT16, ::arrow::Int16Type, Int32Type)
     TYPED_BATCH_CASE(UINT32, ::arrow::UInt32Type, Int32Type)
     TYPED_BATCH_CASE(INT32, ::arrow::Int32Type, Int32Type)
-    TYPED_BATCH_CASE(DATE, ::arrow::DateType, Int32Type)
+    TYPED_BATCH_CASE(DATE64, ::arrow::Date64Type, Int32Type)
     TYPED_BATCH_CASE(UINT64, ::arrow::UInt64Type, Int64Type)
     TYPED_BATCH_CASE(INT64, ::arrow::Int64Type, Int64Type)
     TYPED_BATCH_CASE(FLOAT, ::arrow::FloatType, FloatType)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
index 0c336d9..ed989cb 100644
--- a/src/parquet/arrow/schema.cc
+++ b/src/parquet/arrow/schema.cc
@@ -108,7 +108,7 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
       *out = ::arrow::uint32();
       break;
     case LogicalType::DATE:
-      *out = ::arrow::date();
+      *out = ::arrow::date64();
       break;
     case LogicalType::DECIMAL:
       *out = MakeDecimalType(node);
@@ -378,7 +378,11 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
     case ArrowType::BINARY:
       type = ParquetType::BYTE_ARRAY;
       break;
-    case ArrowType::DATE:
+    case ArrowType::DATE32:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::DATE;
+      break;
+    case ArrowType::DATE64:
       type = ParquetType::INT32;
       logical_type = LogicalType::DATE;
       break;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 07f1f28..1cf1376 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -34,7 +34,7 @@ template <typename ArrowType>
 using is_arrow_int = std::is_integral<typename ArrowType::c_type>;
 
 template <typename ArrowType>
-using is_arrow_date = std::is_same<ArrowType, ::arrow::DateType>;
+using is_arrow_date = std::is_same<ArrowType, ::arrow::Date64Type>;
 
 template <typename ArrowType>
 using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index 6e927cd..f2ee734 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -80,7 +80,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
   PRIMITIVE_VISIT(Double)
   PRIMITIVE_VISIT(String)
   PRIMITIVE_VISIT(Binary)
-  PRIMITIVE_VISIT(Date)
+  PRIMITIVE_VISIT(Date64)
   PRIMITIVE_VISIT(Time)
   PRIMITIVE_VISIT(Timestamp)
   PRIMITIVE_VISIT(Interval)
@@ -332,7 +332,7 @@ Status FileWriter::Impl::WriteNonNullableBatch(TypedColumnWriter<ParquetType>* w
 }
 
 template <>
-Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::DateType>(
+Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::Date64Type>(
     TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels,
     const int16_t* def_levels, const int16_t* rep_levels, const int64_t* data_ptr) {
   RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t)));
@@ -384,7 +384,7 @@ Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter<ParquetType>* writ
 }
 
 template <>
-Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::DateType>(
+Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::Date64Type>(
     TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels,
     const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
     int64_t valid_bits_offset, const int64_t* data_ptr) {
@@ -555,7 +555,7 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) {
       WRITE_BATCH_CASE(INT16, Int16Type, Int32Type)
       WRITE_BATCH_CASE(UINT16, UInt16Type, Int32Type)
       WRITE_BATCH_CASE(INT32, Int32Type, Int32Type)
-      WRITE_BATCH_CASE(DATE, DateType, Int32Type)
+      WRITE_BATCH_CASE(DATE64, Date64Type, Int32Type)
       WRITE_BATCH_CASE(INT64, Int64Type, Int64Type)
       WRITE_BATCH_CASE(TIMESTAMP, TimestampType, Int64Type)
       WRITE_BATCH_CASE(UINT64, UInt64Type, Int64Type)