You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/10/18 12:09:41 UTC
parquet-cpp git commit: PARQUET-752: Account for upstream Arrow API
changes
Repository: parquet-cpp
Updated Branches:
refs/heads/master aba7c374c -> 82515fead
PARQUET-752: Account for upstream Arrow API changes
As soon as ARROW-261/317 are merged, I'll update the thirdparty git SHA so we can get a green build
Author: Wes McKinney <we...@twosigma.com>
Closes #180 from wesm/PARQUET-752 and squashes the following commits:
0085d92 [Wes McKinney] Fix benchmark code for API changes. Remove conda builds
e2ee9b3 [Wes McKinney] Update thirdparty build directions now that Arrow doesn't have the old scripts
92f6c35 [Wes McKinney] Bump thirdparty to arrow HEAD
3d49b50 [Wes McKinney] Fix for ARROW-317
d68d9d6 [Wes McKinney] Account for upstream API changes in ARROW-261
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/82515fea
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/82515fea
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/82515fea
Branch: refs/heads/master
Commit: 82515feadd593482e509d2e6931cda29aba66cb0
Parents: aba7c37
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Oct 18 08:09:33 2016 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Oct 18 08:09:33 2016 -0400
----------------------------------------------------------------------
.travis.yml | 16 -----
ci/travis_conda_build.sh | 46 -------------
.../arrow/arrow-reader-writer-benchmark.cc | 3 +-
src/parquet/arrow/arrow-reader-writer-test.cc | 36 ++++++----
src/parquet/arrow/reader.cc | 6 +-
src/parquet/arrow/test-util.h | 72 ++++++++++----------
src/parquet/arrow/writer.cc | 13 ++--
src/parquet/column/writer.cc | 4 +-
thirdparty/build_thirdparty.sh | 20 +++---
thirdparty/versions.sh | 2 +-
10 files changed, 84 insertions(+), 134 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 6dc994e..5ca6de4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,22 +38,6 @@ matrix:
before_install:
- mkdir $TRAVIS_BUILD_DIR/parquet-build
- pushd $TRAVIS_BUILD_DIR/parquet-build
- - compiler: gcc
- env: PARQUET_TEST_GROUP=packaging
- os: linux
- before_script:
- - export CC="gcc-4.9"
- - export CXX="g++-4.9"
- script:
- - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh
- - os: osx
- env: PARQUET_TEST_GROUP=packaging
- compiler: clang
- addons:
- before_script:
- before_install:
- script:
- - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh
language: cpp
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/ci/travis_conda_build.sh
----------------------------------------------------------------------
diff --git a/ci/travis_conda_build.sh b/ci/travis_conda_build.sh
deleted file mode 100755
index 4d9c03d..0000000
--- a/ci/travis_conda_build.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
-set -e
-
-if [ $TRAVIS_OS_NAME == "linux" ]; then
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
-else
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
-fi
-
-wget -O miniconda.sh $MINICONDA_URL
-MINICONDA=$HOME/miniconda
-bash miniconda.sh -b -p $MINICONDA
-export PATH="$MINICONDA/bin:$PATH"
-
-conda update -y -q conda
-conda install -y -q conda-build
-conda info -a
-
-conda config --set show_channel_urls yes
-conda config --add channels conda-forge
-conda config --add channels apache
-
-conda install --yes jinja2 anaconda-client
-
-cd $TRAVIS_BUILD_DIR
-
-conda build conda.recipe
-
-CONDA_PACKAGE=`conda build --output conda.recipe | grep bz2`
-
-if [ $TRAVIS_BRANCH == "master" ] && [ $TRAVIS_PULL_REQUEST == "false" ]; then
- anaconda --token $ANACONDA_TOKEN upload $CONDA_PACKAGE --user apache --channel dev;
-fi
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index 9ce5f96..bbaefaa 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -94,7 +94,8 @@ std::shared_ptr<::arrow::Table> TableFromVector(
} else {
builder.Append(vec.data(), vec.size(), nullptr);
}
- std::shared_ptr<::arrow::Array> array = builder.Finish();
+ std::shared_ptr<::arrow::Array> array;
+ builder.Finish(&array);
auto field = std::make_shared<::arrow::Field>("column", type, nullable);
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index b1f1c52..1f28e5c 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -264,7 +264,8 @@ typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8
TYPED_TEST_CASE(TestParquetIO, TestTypes);
TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) {
- auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED);
this->WriteFlatColumn(schema, values);
@@ -273,7 +274,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) {
}
TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
- auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -291,7 +293,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
// This also tests max_definition_level = 1
- auto values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL);
this->WriteFlatColumn(schema, values);
@@ -301,7 +304,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
// This also tests max_definition_level = 1
- std::shared_ptr<Array> values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -311,7 +315,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
}
TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
- auto values = NonNullArray<TypeParam>(SMALL_SIZE);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values));
int64_t chunk_size = values->length() / 4;
std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED);
@@ -327,7 +332,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
}
TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
- auto values = NonNullArray<TypeParam>(LARGE_SIZE);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NonNullArray<TypeParam>(LARGE_SIZE, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteFlatTable(
@@ -338,7 +344,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
int64_t chunk_size = SMALL_SIZE / 4;
- auto values = NullableArray<TypeParam>(SMALL_SIZE, 10);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values));
std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL);
FileWriter writer(::arrow::default_memory_pool(), this->MakeWriter(schema));
@@ -354,7 +361,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
TYPED_TEST(TestParquetIO, SingleColumnTableOptionalChunkedWrite) {
// This also tests max_definition_level = 1
- auto values = NullableArray<TypeParam>(LARGE_SIZE, 100);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NullableArray<TypeParam>(LARGE_SIZE, 100, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(),
@@ -367,8 +375,8 @@ using TestUInt32ParquetIO = TestParquetIO<::arrow::UInt32Type>;
TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) {
// This also tests max_definition_level = 1
- std::shared_ptr<PrimitiveArray> values =
- NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100);
+ std::shared_ptr<Array> values;
+ ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
// Parquet 2.0 roundtrip should yield an uint32_t column again
@@ -384,8 +392,12 @@ TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) {
TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) {
// This also tests max_definition_level = 1
- std::shared_ptr<PrimitiveArray> values =
- NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100);
+ std::shared_ptr<Array> arr;
+ ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &arr));
+
+ std::shared_ptr<::arrow::UInt32Array> values =
+ std::dynamic_pointer_cast<::arrow::UInt32Array>(arr);
+
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
// Parquet 1.0 returns an int64_t column as there is no way to tell a Parquet 1.0
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 0e9f255..c581997 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -304,8 +304,7 @@ Status FlatColumnReader::Impl::TypedReadBatch(
}
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
- *out = builder.Finish();
- return Status::OK();
+ return builder.Finish(out);
}
template <>
@@ -347,8 +346,7 @@ Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType
}
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
- *out = builder.Finish();
- return Status::OK();
+ return builder.Finish(out);
}
#define TYPED_BATCH_CASE(ENUM, ArrowType, ParquetType) \
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index deac9f7..92798ff 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -23,9 +23,11 @@
#include "arrow/types/string.h"
namespace parquet {
-
namespace arrow {
+using ::arrow::Array;
+using ::arrow::Status;
+
template <typename ArrowType>
using is_arrow_float = std::is_floating_point<typename ArrowType::c_type>;
@@ -36,56 +38,52 @@ template <typename ArrowType>
using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>;
template <class ArrowType>
-typename std::enable_if<is_arrow_float<ArrowType>::value,
- std::shared_ptr<::arrow::PrimitiveArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NonNullArray(
+ size_t size, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values);
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
template <class ArrowType>
-typename std::enable_if<is_arrow_int<ArrowType>::value,
- std::shared_ptr<::arrow::PrimitiveArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NonNullArray(
+ size_t size, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
template <class ArrowType>
-typename std::enable_if<is_arrow_string<ArrowType>::value,
- std::shared_ptr<::arrow::StringArray>>::type
-NonNullArray(size_t size) {
+typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NonNullArray(
+ size_t size, std::shared_ptr<Array>* out) {
::arrow::StringBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>());
for (size_t i = 0; i < size; i++) {
builder.Append("test-string");
}
- return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+ return builder.Finish(out);
}
template <>
-std::shared_ptr<::arrow::PrimitiveArray> NonNullArray<::arrow::BooleanType>(size_t size) {
+Status NonNullArray<::arrow::BooleanType>(size_t size, std::shared_ptr<Array>* out) {
std::vector<uint8_t> values;
::arrow::test::randint<uint8_t>(size, 0, 1, &values);
::arrow::BooleanBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
builder.Append(values.data(), values.size());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
// This helper function only supports (size/2) nulls.
template <typename ArrowType>
-typename std::enable_if<is_arrow_float<ArrowType>::value,
- std::shared_ptr<::arrow::PrimitiveArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NullableArray(
+ size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values);
std::vector<uint8_t> valid_bytes(size, 1);
@@ -97,14 +95,13 @@ NullableArray(size_t size, size_t num_nulls) {
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size(), valid_bytes.data());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
// This helper function only supports (size/2) nulls.
template <typename ArrowType>
-typename std::enable_if<is_arrow_int<ArrowType>::value,
- std::shared_ptr<::arrow::PrimitiveArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NullableArray(
+ size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
std::vector<uint8_t> valid_bytes(size, 1);
@@ -116,14 +113,13 @@ NullableArray(size_t size, size_t num_nulls) {
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size(), valid_bytes.data());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
// This helper function only supports (size/2) nulls yet.
template <typename ArrowType>
-typename std::enable_if<is_arrow_string<ArrowType>::value,
- std::shared_ptr<::arrow::StringArray>>::type
-NullableArray(size_t size, size_t num_nulls) {
+typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NullableArray(
+ size_t size, size_t num_nulls, std::shared_ptr<::arrow::Array>* out) {
std::vector<uint8_t> valid_bytes(size, 1);
for (size_t i = 0; i < num_nulls; i++) {
@@ -135,13 +131,13 @@ NullableArray(size_t size, size_t num_nulls) {
for (size_t i = 0; i < size; i++) {
builder.Append("test-string");
}
- return std::static_pointer_cast<::arrow::StringArray>(builder.Finish());
+ return builder.Finish(out);
}
// This helper function only supports (size/2) nulls yet.
template <>
-std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>(
- size_t size, size_t num_nulls) {
+Status NullableArray<::arrow::BooleanType>(
+ size_t size, size_t num_nulls, std::shared_ptr<Array>* out) {
std::vector<uint8_t> values;
::arrow::test::randint<uint8_t>(size, 0, 1, &values);
std::vector<uint8_t> valid_bytes(size, 1);
@@ -153,17 +149,17 @@ std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>(
::arrow::BooleanBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
builder.Append(values.data(), values.size(), valid_bytes.data());
- return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish());
+ return builder.Finish(out);
}
-std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
- const std::shared_ptr<::arrow::Array>& array, bool nullable) {
+std::shared_ptr<::arrow::Column> MakeColumn(
+ const std::string& name, const std::shared_ptr<Array>& array, bool nullable) {
auto field = std::make_shared<::arrow::Field>(name, array->type(), nullable);
return std::make_shared<::arrow::Column>(field, array);
}
std::shared_ptr<::arrow::Table> MakeSimpleTable(
- const std::shared_ptr<::arrow::Array>& values, bool nullable) {
+ const std::shared_ptr<Array>& values, bool nullable) {
std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, nullable);
std::vector<std::shared_ptr<::arrow::Column>> columns({column});
std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
@@ -172,7 +168,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(
}
template <typename T>
-void ExpectArray(T* expected, ::arrow::Array* result) {
+void ExpectArray(T* expected, Array* result) {
auto p_array = static_cast<::arrow::PrimitiveArray*>(result);
for (int i = 0; i < result->length(); i++) {
EXPECT_EQ(expected[i], reinterpret_cast<const T*>(p_array->data()->data())[i]);
@@ -180,7 +176,7 @@ void ExpectArray(T* expected, ::arrow::Array* result) {
}
template <typename ArrowType>
-void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) {
+void ExpectArray(typename ArrowType::c_type* expected, Array* result) {
::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result);
for (int64_t i = 0; i < result->length(); i++) {
EXPECT_EQ(expected[i],
@@ -189,11 +185,13 @@ void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) {
}
template <>
-void ExpectArray<::arrow::BooleanType>(uint8_t* expected, ::arrow::Array* result) {
+void ExpectArray<::arrow::BooleanType>(uint8_t* expected, Array* result) {
::arrow::BooleanBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
builder.Append(expected, result->length());
- std::shared_ptr<::arrow::Array> expected_array = builder.Finish();
+
+ std::shared_ptr<Array> expected_array;
+ EXPECT_OK(builder.Finish(&expected_array));
EXPECT_TRUE(result->Equals(expected_array));
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index ff3707b..e75d4b7 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -45,6 +45,8 @@ using parquet::schema::GroupNode;
namespace parquet {
namespace arrow {
+namespace BitUtil = ::arrow::BitUtil;
+
class FileWriter::Impl {
public:
Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer);
@@ -176,7 +178,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
if (writer->descr()->max_definition_level() == 0) {
// no nulls, just dump the data
for (int64_t i = 0; i < length; i++) {
- buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+ buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i);
}
PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, nullptr, nullptr, buffer_ptr));
} else if (writer->descr()->max_definition_level() == 1) {
@@ -186,7 +188,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
if (data->null_count() == 0) {
std::fill(def_levels_ptr, def_levels_ptr + length, 1);
for (int64_t i = 0; i < length; i++) {
- buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i);
+ buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i);
}
// TODO(PARQUET-644): write boolean values as a packed bitmap
PARQUET_CATCH_NOT_OK(
@@ -198,7 +200,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
def_levels_ptr[i] = 0;
} else {
def_levels_ptr[i] = 1;
- buffer_ptr[buffer_idx++] = ::arrow::util::get_bit(data_ptr, offset + i);
+ buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i);
}
}
PARQUET_CATCH_NOT_OK(
@@ -260,9 +262,8 @@ Status FileWriter::Impl::WriteFlatColumnChunk(
DCHECK((offset + length) <= data->length());
RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(ByteArray)));
auto buffer_ptr = reinterpret_cast<ByteArray*>(data_buffer_.mutable_data());
- auto values = std::dynamic_pointer_cast<PrimitiveArray>(data->values());
- auto data_ptr = reinterpret_cast<const uint8_t*>(values->data()->data());
- DCHECK(values != nullptr);
+ auto data_ptr = reinterpret_cast<const uint8_t*>(data->data()->data());
+ DCHECK(data_ptr != nullptr);
auto writer = reinterpret_cast<TypedColumnWriter<ByteArrayType>*>(column_writer);
if (writer->descr()->max_definition_level() > 0) {
RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t)));
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/column/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc
index b917945..d1c3fe2 100644
--- a/src/parquet/column/writer.cc
+++ b/src/parquet/column/writer.cc
@@ -353,8 +353,8 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
}
template <typename DType>
-void TypedColumnWriter<DType>::WriteBatch(int64_t num_values,
- const int16_t* def_levels, const int16_t* rep_levels, const T* values) {
+void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def_levels,
+ const int16_t* rep_levels, const T* values) {
// We check for DataPage limits only after we have inserted the values. If a user
// writes a large number of values, the DataPage size can be much above the limit.
// The purpose of this chunking is to bound this. Even if a user writes large number
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh
index 6ebcd96..4a91516 100755
--- a/thirdparty/build_thirdparty.sh
+++ b/thirdparty/build_thirdparty.sh
@@ -75,15 +75,6 @@ fi
STANDARD_DARWIN_FLAGS="-std=c++11 -stdlib=libc++"
-# build arrow
-if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then
- cd $TP_DIR/$ARROW_BASEDIR/cpp
- source ./setup_build_env.sh
- cmake . -DARROW_PARQUET=OFF -DARROW_HDFS=ON -DCMAKE_INSTALL_PREFIX=$PREFIX
- make -j$PARALLEL install
- # :
-fi
-
# build googletest
GOOGLETEST_ERROR="failed for googletest!"
if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then
@@ -142,5 +133,16 @@ if [ -n "$F_ALL" -o -n "$F_THRIFT" ]; then
fi
fi
+# build arrow
+if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then
+ cd $TP_DIR/$ARROW_BASEDIR/cpp
+ cmake -DARROW_BUILD_TESTS=off \
+ -DARROW_HDFS=ON \
+ -DCMAKE_INSTALL_PREFIX=$PREFIX \
+ .
+ make -j$PARALLEL install
+ # :
+fi
+
echo "---------------------"
echo "Thirdparty dependencies built and installed into $PREFIX successfully"
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh
index f058b79..87fe6b6 100755
--- a/thirdparty/versions.sh
+++ b/thirdparty/versions.sh
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-ARROW_VERSION="7fb4d24a35269db99fa112c0512d4a32c372dd74"
+ARROW_VERSION="676c32ccea6274c75b2750453c1ddbc5f645c037"
ARROW_URL="https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz"
ARROW_BASEDIR="arrow-${ARROW_VERSION}"