You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/10/18 12:09:41 UTC

parquet-cpp git commit: PARQUET-752: Account for upstream Arrow API changes

Repository: parquet-cpp
Updated Branches:
  refs/heads/master aba7c374c -> 82515fead


PARQUET-752: Account for upstream Arrow API changes

As soon as ARROW-261/317 are merged, I'll update the thirdparty git SHA so we can get a green build

Author: Wes McKinney <we...@twosigma.com>

Closes #180 from wesm/PARQUET-752 and squashes the following commits:

0085d92 [Wes McKinney] Fix benchmark code for API changes. Remove conda builds
e2ee9b3 [Wes McKinney] Update thirdparty build directions now that Arrow doesn't have the old scripts
92f6c35 [Wes McKinney] Bump thirdparty to arrow HEAD
3d49b50 [Wes McKinney] Fix for ARROW-317
d68d9d6 [Wes McKinney] Account for upstream API changes in ARROW-261


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/82515fea
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/82515fea
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/82515fea

Branch: refs/heads/master
Commit: 82515feadd593482e509d2e6931cda29aba66cb0
Parents: aba7c37
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Oct 18 08:09:33 2016 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Oct 18 08:09:33 2016 -0400

----------------------------------------------------------------------
 .travis.yml                                     | 16 -----
 ci/travis_conda_build.sh                        | 46 -------------
 .../arrow/arrow-reader-writer-benchmark.cc      |  3 +-
 src/parquet/arrow/arrow-reader-writer-test.cc   | 36 ++++++----
 src/parquet/arrow/reader.cc                     |  6 +-
 src/parquet/arrow/test-util.h                   | 72 ++++++++++----------
 src/parquet/arrow/writer.cc                     | 13 ++--
 src/parquet/column/writer.cc                    |  4 +-
 thirdparty/build_thirdparty.sh                  | 20 +++---
 thirdparty/versions.sh                          |  2 +-
 10 files changed, 84 insertions(+), 134 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 6dc994e..5ca6de4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,22 +38,6 @@ matrix:
     before_install:
     - mkdir $TRAVIS_BUILD_DIR/parquet-build
     - pushd $TRAVIS_BUILD_DIR/parquet-build
-  - compiler: gcc
-    env: PARQUET_TEST_GROUP=packaging
-    os: linux
-    before_script:
-    - export CC="gcc-4.9"
-    - export CXX="g++-4.9"
-    script:
-    - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh
-  - os: osx
-    env: PARQUET_TEST_GROUP=packaging
-    compiler: clang
-    addons:
-    before_script:
-    before_install:
-    script:
-    - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh
 
 language: cpp
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/ci/travis_conda_build.sh
----------------------------------------------------------------------
diff --git a/ci/travis_conda_build.sh b/ci/travis_conda_build.sh
deleted file mode 100755
index 4d9c03d..0000000
--- a/ci/travis_conda_build.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License. See accompanying LICENSE file.
-
-set -e
-
-if [ $TRAVIS_OS_NAME == "linux" ]; then
-  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
-else
-  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
-fi
-
-wget -O miniconda.sh $MINICONDA_URL
-MINICONDA=$HOME/miniconda
-bash miniconda.sh -b -p $MINICONDA
-export PATH="$MINICONDA/bin:$PATH"
-
-conda update -y -q conda
-conda install -y -q conda-build
-conda info -a
-
-conda config --set show_channel_urls yes
-conda config --add channels conda-forge
-conda config --add channels apache
-
-conda install --yes jinja2 anaconda-client
-
-cd $TRAVIS_BUILD_DIR
-
-conda build conda.recipe
-
-CONDA_PACKAGE=`conda build --output conda.recipe | grep bz2`
-
-if [ $TRAVIS_BRANCH == "master" ] && [ $TRAVIS_PULL_REQUEST == "false" ]; then
-  anaconda --token $ANACONDA_TOKEN upload $CONDA_PACKAGE --user apache --channel dev;
-fi

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index 9ce5f96..bbaefaa 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -94,7 +94,8 @@ std::shared_ptr<::arrow::Table> TableFromVector(
   } else {
     builder.Append(vec.data(), vec.size(), nullptr);
   }
-  std::shared_ptr<::arrow::Array> array = builder.Finish();
+  std::shared_ptr<::arrow::Array> array;
+  builder.Finish(&array);
   auto field = std::make_shared<::arrow::Field>("column", type, nullable);
   auto schema = std::make_shared<::arrow::Schema>(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index b1f1c52..1f28e5c 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -264,7 +264,8 @@ typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8
 TYPED_TEST_CASE(TestParquetIO, TestTypes);
 
 TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) {
-  auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
 
   std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED);
   this->WriteFlatColumn(schema, values);
@@ -273,7 +274,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) {
 }
 
 TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
-  auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
   std::shared_ptr<Table> table = MakeSimpleTable(values, false);
   this->sink_ = std::make_shared<InMemoryOutputStream>();
   ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -291,7 +293,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
 
 TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
   // This also tests max_definition_level = 1
-  auto values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
 
   std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL);
   this->WriteFlatColumn(schema, values);
@@ -301,7 +304,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
 
 TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
   // This also tests max_definition_level = 1
-  std::shared_ptr<Array> values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
   std::shared_ptr<Table> table = MakeSimpleTable(values, true);
   this->sink_ = std::make_shared<InMemoryOutputStream>();
   ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -311,7 +315,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
 }
 
 TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
-  auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
   int64_t chunk_size = values->length() / 4;
 
   std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED);
@@ -327,7 +332,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
 }
 
 TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
-  auto values = NonNullArray<TypeParam>(LARGE_SIZE);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NonNullArray<TypeParam>(LARGE_SIZE, &values));
   std::shared_ptr<Table> table = MakeSimpleTable(values, false);
   this->sink_ = std::make_shared<InMemoryOutputStream>();
   ASSERT_OK_NO_THROW(WriteFlatTable(
@@ -338,7 +344,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
 
 TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
   int64_t chunk_size = SMALL_SIZE / 4;
-  auto values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
 
   std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL);
   FileWriter writer(::arrow::default_memory_pool(), this->MakeWriter(schema));
@@ -354,7 +361,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
 
 TYPED_TEST(TestParquetIO, SingleColumnTableOptionalChunkedWrite) {
   // This also tests max_definition_level = 1
-  auto values = NullableArray<TypeParam>(LARGE_SIZE, 100);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NullableArray<TypeParam>(LARGE_SIZE, 100, &values));
   std::shared_ptr<Table> table = MakeSimpleTable(values, true);
   this->sink_ = std::make_shared<InMemoryOutputStream>();
   ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -367,8 +375,8 @@ using TestUInt32ParquetIO = TestParquetIO<::arrow::UInt32Type>;
 
 TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) {
   // This also tests max_definition_level = 1
-  std::shared_ptr<PrimitiveArray> values =
-      NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100);
+  std::shared_ptr<Array> values;
+  ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &values));
   std::shared_ptr<Table> table = MakeSimpleTable(values, true);
 
   // Parquet 2.0 roundtrip should yield an uint32_t column again
@@ -384,8 +392,12 @@ TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) {
 
 TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) {
   // This also tests max_definition_level = 1
-  std::shared_ptr<PrimitiveArray> values =
-      NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100);
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &arr));
+
+  std::shared_ptr<::arrow::UInt32Array> values =
+      std::dynamic_pointer_cast<::arrow::UInt32Array>(arr);
+
   std::shared_ptr<Table> table = MakeSimpleTable(values, true);
 
   // Parquet 1.0 returns an int64_t column as there is no way to tell a Parquet 1.0

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 0e9f255..c581997 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -304,8 +304,7 @@ Status FlatColumnReader::Impl::TypedReadBatch(
     }
     if (!column_reader_->HasNext()) { NextRowGroup(); }
   }
-  *out = builder.Finish();
-  return Status::OK();
+  return builder.Finish(out);
 }
 
 template <>
@@ -347,8 +346,7 @@ Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType
     }
     if (!column_reader_->HasNext()) { NextRowGroup(); }
   }
-  *out = builder.Finish();
-  return Status::OK();
+  return builder.Finish(out);
 }
 
 #define TYPED_BATCH_CASE(ENUM, ArrowType, ParquetType)              \

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index deac9f7..92798ff 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -23,9 +23,11 @@
 #include "arrow/types/string.h"
 
 namespace parquet {
-
 namespace arrow {
 
+using ::arrow::Array;
+using ::arrow::Status;
+
 template <typename ArrowType>
 using is_arrow_float = std::is_floating_point<typename ArrowType::c_type>;
 
@@ -36,56 +38,52 @@ template <typename ArrowType>
 using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>;
 
 template <class ArrowType>
-typename std::enable_if<is_arrow_float<ArrowType>::value,
-    std::shared_ptr<::arrow::PrimitiveArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
   std::vector<typename ArrowType::c_type> values;
   ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values);
   ::arrow::NumericBuilder<ArrowType> builder(
       ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
   builder.Append(values.data(), values.size());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 template <class ArrowType>
-typename std::enable_if<is_arrow_int<ArrowType>::value,
-    std::shared_ptr<::arrow::PrimitiveArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
   std::vector<typename ArrowType::c_type> values;
   ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
   ::arrow::NumericBuilder<ArrowType> builder(
       ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
   builder.Append(values.data(), values.size());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 template <class ArrowType>
-typename std::enable_if<is_arrow_string<ArrowType>::value,
-    std::shared_ptr<::arrow::StringArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
   ::arrow::StringBuilder builder(
       ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>());
   for (size_t i = 0; i < size; i++) {
     builder.Append("test-string");
   }
-  return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 template <>
-std::shared_ptr<::arrow::PrimitiveArray> NonNullArray<::arrow::BooleanType>(size_t size) {
+Status NonNullArray<::arrow::BooleanType>(size_t size, std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values;
   ::arrow::test::randint<uint8_t>(size, 0, 1, &values);
   ::arrow::BooleanBuilder builder(
       ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
   builder.Append(values.data(), values.size());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 // This helper function only supports (size/2) nulls.
 template <typename ArrowType>
-typename std::enable_if<is_arrow_float<ArrowType>::value,
-    std::shared_ptr<::arrow::PrimitiveArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NullableArray(
+    size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
   std::vector<typename ArrowType::c_type> values;
   ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values);
   std::vector<uint8_t> valid_bytes(size, 1);
@@ -97,14 +95,13 @@ NullableArray(size_t size, size_t num_nulls) {
   ::arrow::NumericBuilder<ArrowType> builder(
       ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
   builder.Append(values.data(), values.size(), valid_bytes.data());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 // This helper function only supports (size/2) nulls.
 template <typename ArrowType>
-typename std::enable_if<is_arrow_int<ArrowType>::value,
-    std::shared_ptr<::arrow::PrimitiveArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NullableArray(
+    size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
   std::vector<typename ArrowType::c_type> values;
   ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
   std::vector<uint8_t> valid_bytes(size, 1);
@@ -116,14 +113,13 @@ NullableArray(size_t size, size_t num_nulls) {
   ::arrow::NumericBuilder<ArrowType> builder(
       ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
   builder.Append(values.data(), values.size(), valid_bytes.data());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 // This helper function only supports (size/2) nulls yet.
 template <typename ArrowType>
-typename std::enable_if<is_arrow_string<ArrowType>::value,
-    std::shared_ptr<::arrow::StringArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NullableArray(
+    size_t size, size_t num_nulls, std::shared_ptr<::arrow::Array>* out) {
   std::vector<uint8_t> valid_bytes(size, 1);
 
   for (size_t i = 0; i < num_nulls; i++) {
@@ -135,13 +131,13 @@ NullableArray(size_t size, size_t num_nulls) {
   for (size_t i = 0; i < size; i++) {
     builder.Append("test-string");
   }
-  return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
 // This helper function only supports (size/2) nulls yet.
 template <>
-std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>(
-    size_t size, size_t num_nulls) {
+Status NullableArray<::arrow::BooleanType>(
+    size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values;
   ::arrow::test::randint<uint8_t>(size, 0, 1, &values);
   std::vector<uint8_t> valid_bytes(size, 1);
@@ -153,17 +149,17 @@ std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>(
   ::arrow::BooleanBuilder builder(
       ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
   builder.Append(values.data(), values.size(), valid_bytes.data());
-  return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+  return builder.Finish(out);
 }
 
-std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
-    const std::shared_ptr<::arrow::Array>& array, bool nullable) {
+std::shared_ptr<::arrow::Column> MakeColumn(
+    const std::string& name, const std::shared_ptr<Array>& array, bool nullable) {
   auto field = std::make_shared<::arrow::Field>(name, array->type(), nullable);
   return std::make_shared<::arrow::Column>(field, array);
 }
 
 std::shared_ptr<::arrow::Table> MakeSimpleTable(
-    const std::shared_ptr<::arrow::Array>& values, bool nullable) {
+    const std::shared_ptr<Array>& values, bool nullable) {
   std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, nullable);
   std::vector<std::shared_ptr<::arrow::Column>> columns({column});
   std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
@@ -172,7 +168,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(
 }
 
 template <typename T>
-void ExpectArray(T* expected, ::arrow::Array* result) {
+void ExpectArray(T* expected, Array* result) {
   auto p_array = static_cast<::arrow::PrimitiveArray*>(result);
   for (int i = 0; i < result->length(); i++) {
     EXPECT_EQ(expected[i], reinterpret_cast<const T*>(p_array->data()->data())[i]);
@@ -180,7 +176,7 @@ void ExpectArray(T* expected, ::arrow::Array* result) {
 }
 
 template <typename ArrowType>
-void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) {
+void ExpectArray(typename ArrowType::c_type* expected, Array* result) {
   ::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result);
   for (int64_t i = 0; i < result->length(); i++) {
     EXPECT_EQ(expected[i],
@@ -189,11 +185,13 @@ void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) {
 }
 
 template <>
-void ExpectArray<::arrow::BooleanType>(uint8_t* expected, ::arrow::Array* result) {
+void ExpectArray<::arrow::BooleanType>(uint8_t* expected, Array* result) {
   ::arrow::BooleanBuilder builder(
       ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
   builder.Append(expected, result->length());
-  std::shared_ptr<::arrow::Array> expected_array = builder.Finish();
+
+  std::shared_ptr<Array> expected_array;
+  EXPECT_OK(builder.Finish(&expected_array));
   EXPECT_TRUE(result->Equals(expected_array));
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index ff3707b..e75d4b7 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -45,6 +45,8 @@ using parquet::schema::GroupNode;
 namespace parquet {
 namespace arrow {
 
+namespace BitUtil = ::arrow::BitUtil;
+
 class FileWriter::Impl {
  public:
   Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer);
@@ -176,7 +178,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
   if (writer->descr()->max_definition_level() == 0) {
     // no nulls, just dump the data
     for (int64_t i = 0; i < length; i++) {
-      buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+      buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i);
     }
     PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, nullptr, nullptr, buffer_ptr));
   } else if (writer->descr()->max_definition_level() == 1) {
@@ -186,7 +188,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
     if (data->null_count() == 0) {
       std::fill(def_levels_ptr, def_levels_ptr + length, 1);
       for (int64_t i = 0; i < length; i++) {
-        buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+        buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i);
       }
       // TODO(PARQUET-644): write boolean values as a packed bitmap
       PARQUET_CATCH_NOT_OK(
@@ -198,7 +200,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
           def_levels_ptr[i] = 0;
         } else {
           def_levels_ptr[i] = 1;
-          buffer_ptr[buffer_idx++] = ::arrow::util::get_bit(data_ptr, offset + i);
+          buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i);
         }
       }
       PARQUET_CATCH_NOT_OK(
@@ -260,9 +262,8 @@ Status FileWriter::Impl::WriteFlatColumnChunk(
   DCHECK((offset + length) <= data->length());
   RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(ByteArray)));
   auto buffer_ptr = reinterpret_cast<ByteArray*>(data_buffer_.mutable_data());
-  auto values = std::dynamic_pointer_cast<PrimitiveArray>(data->values());
-  auto data_ptr = reinterpret_cast<const uint8_t*>(values->data()->data());
-  DCHECK(values != nullptr);
+  auto data_ptr = reinterpret_cast<const uint8_t*>(data->data()->data());
+  DCHECK(data_ptr != nullptr);
   auto writer = reinterpret_cast<TypedColumnWriter<ByteArrayType>*>(column_writer);
   if (writer->descr()->max_definition_level() > 0) {
     RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t)));

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/column/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc
index b917945..d1c3fe2 100644
--- a/src/parquet/column/writer.cc
+++ b/src/parquet/column/writer.cc
@@ -353,8 +353,8 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
 }
 
 template <typename DType>
-void TypedColumnWriter<DType>::WriteBatch(int64_t num_values,
-    const int16_t* def_levels, const int16_t* rep_levels, const T* values) {
+void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def_levels,
+    const int16_t* rep_levels, const T* values) {
   // We check for DataPage limits only after we have inserted the values. If a user
   // writes a large number of values, the DataPage size can be much above the limit.
   // The purpose of this chunking is to bound this. Even if a user writes large number

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh
index 6ebcd96..4a91516 100755
--- a/thirdparty/build_thirdparty.sh
+++ b/thirdparty/build_thirdparty.sh
@@ -75,15 +75,6 @@ fi
 
 STANDARD_DARWIN_FLAGS="-std=c++11 -stdlib=libc++"
 
-# build arrow
-if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then
-    cd $TP_DIR/$ARROW_BASEDIR/cpp
-    source ./setup_build_env.sh
-    cmake . -DARROW_PARQUET=OFF -DARROW_HDFS=ON -DCMAKE_INSTALL_PREFIX=$PREFIX
-    make -j$PARALLEL install
-    # :
-fi
-
 # build googletest
 GOOGLETEST_ERROR="failed for googletest!"
 if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then
@@ -142,5 +133,16 @@ if [ -n "$F_ALL" -o -n "$F_THRIFT" ]; then
   fi
 fi
 
+# build arrow
+if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then
+    cd $TP_DIR/$ARROW_BASEDIR/cpp
+    cmake -DARROW_BUILD_TESTS=off \
+          -DARROW_HDFS=ON \
+          -DCMAKE_INSTALL_PREFIX=$PREFIX \
+          .
+    make -j$PARALLEL install
+    # :
+fi
+
 echo "---------------------"
 echo "Thirdparty dependencies built and installed into $PREFIX successfully"

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh
index f058b79..87fe6b6 100755
--- a/thirdparty/versions.sh
+++ b/thirdparty/versions.sh
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ARROW_VERSION="7fb4d24a35269db99fa112c0512d4a32c372dd74"
+ARROW_VERSION="676c32ccea6274c75b2750453c1ddbc5f645c037"
 ARROW_URL="https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz"
 ARROW_BASEDIR="arrow-${ARROW_VERSION}"