You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/09/02 15:34:41 UTC
parquet-cpp git commit: PARQUET-700: Disable dictionary encoding for
boolean columns
Repository: parquet-cpp
Updated Branches:
refs/heads/master 261072ca9 -> 5e524d146
PARQUET-700: Disable dictionary encoding for boolean columns
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #148 from xhochy/parquet-700 and squashes the following commits:
d33a670 [Uwe L. Korn] Format fixes
e8530ba [Uwe L. Korn] Also test writing booleans with Dictionary encoding
328b430 [Uwe L. Korn] Format fixes
ab33f9b [Uwe L. Korn] PARQUET-700: Disable dictionary encoding for boolean columns
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/5e524d14
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/5e524d14
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/5e524d14
Branch: refs/heads/master
Commit: 5e524d146c556b1f2ef6da6f8d9a6dbb6b8cea73
Parents: 261072c
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Sep 2 11:34:31 2016 -0400
Committer: Wes McKinney <we...@apache.org>
Committed: Fri Sep 2 11:34:31 2016 -0400
----------------------------------------------------------------------
src/parquet/column/column-writer-test.cc | 29 +++++++++++++--------------
src/parquet/column/writer.cc | 3 ++-
2 files changed, 16 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5e524d14/src/parquet/column/column-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-writer-test.cc b/src/parquet/column/column-writer-test.cc
index ab232ea..3806bd0 100644
--- a/src/parquet/column/column-writer-test.cc
+++ b/src/parquet/column/column-writer-test.cc
@@ -84,14 +84,22 @@ class TestPrimitiveWriter : public ::testing::Test {
reader_.reset(new TypedColumnReader<TestType>(schema_.get(), std::move(page_reader)));
}
- std::unique_ptr<TypedColumnWriter<TestType>> BuildWriter(
+ std::shared_ptr<TypedColumnWriter<TestType>> BuildWriter(
int64_t output_size = SMALL_SIZE, Encoding::type encoding = Encoding::PLAIN) {
sink_.reset(new InMemoryOutputStream());
std::unique_ptr<SerializedPageWriter> pager(
new SerializedPageWriter(sink_.get(), Compression::UNCOMPRESSED, &metadata_));
- return std::unique_ptr<TypedColumnWriter<TestType>>(
- new TypedColumnWriter<TestType>(schema_.get(), std::move(pager), output_size,
- encoding, writer_properties_.get()));
+ WriterProperties::Builder wp_builder;
+ if (encoding == Encoding::PLAIN_DICTIONARY || encoding == Encoding::RLE_DICTIONARY) {
+ wp_builder.enable_dictionary();
+ } else {
+ wp_builder.disable_dictionary();
+ wp_builder.encoding(encoding);
+ }
+ writer_properties_ = wp_builder.build();
+ std::shared_ptr<ColumnWriter> writer = ColumnWriter::Make(
+ schema_.get(), std::move(pager), output_size, writer_properties_.get());
+ return std::static_pointer_cast<TypedColumnWriter<TestType>>(writer);
}
void SyncValuesOut();
@@ -106,7 +114,7 @@ class TestPrimitiveWriter : public ::testing::Test {
this->GenerateData(SMALL_SIZE);
// Test case 1: required and non-repeated, so no definition or repetition levels
- std::unique_ptr<TypedColumnWriter<TestType>> writer =
+ std::shared_ptr<TypedColumnWriter<TestType>> writer =
this->BuildWriter(SMALL_SIZE, encoding);
writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_ptr_);
// The behaviour should be independent from the number of Close() calls
@@ -191,20 +199,11 @@ typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
TYPED_TEST_CASE(TestPrimitiveWriter, TestTypes);
-// Dictionary encoding for booleans is not supported.
-typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- ByteArrayType, FLBAType> TestDictionaryTypes;
-
-template <typename T>
-class TestPrimitiveDictionaryWriter : public TestPrimitiveWriter<T> {};
-
-TYPED_TEST_CASE(TestPrimitiveDictionaryWriter, TestDictionaryTypes);
-
TYPED_TEST(TestPrimitiveWriter, RequiredPlain) {
this->TestRequiredWithEncoding(Encoding::PLAIN);
}
-TYPED_TEST(TestPrimitiveDictionaryWriter, RequiredDictionary) {
+TYPED_TEST(TestPrimitiveWriter, RequiredDictionary) {
this->TestRequiredWithEncoding(Encoding::PLAIN_DICTIONARY);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/5e524d14/src/parquet/column/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc
index 1c376ad..da4b17c 100644
--- a/src/parquet/column/writer.cc
+++ b/src/parquet/column/writer.cc
@@ -200,7 +200,8 @@ std::shared_ptr<ColumnWriter> ColumnWriter::Make(const ColumnDescriptor* descr,
std::unique_ptr<PageWriter> pager, int64_t expected_rows,
const WriterProperties* properties) {
Encoding::type encoding = properties->encoding(descr->path());
- if (properties->dictionary_enabled(descr->path())) {
+ if (properties->dictionary_enabled(descr->path()) &&
+ descr->physical_type() != Type::BOOLEAN) {
encoding = properties->dictionary_page_encoding();
}
switch (descr->physical_type()) {