You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/06/25 01:48:06 UTC
parquet-cpp git commit: PARQUET-639: Do not export DCHECK in public
headers
Repository: parquet-cpp
Updated Branches:
refs/heads/master 5e831d65d -> 3ca5a7034
PARQUET-639: Do not export DCHECK in public headers
I added a test so that DCHECK does not leak in the public headers. I prefer this to renaming the macro
Author: Wes McKinney <we...@apache.org>
Closes #127 from wesm/no-export-dcheck and squashes the following commits:
52a2d22 [Wes McKinney] Remove exposure of DCHECK macros from publicly-visible headers
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/3ca5a703
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/3ca5a703
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/3ca5a703
Branch: refs/heads/master
Commit: 3ca5a70348f59db66ffa18eb08c80208a4614e2c
Parents: 5e831d6
Author: Wes McKinney <we...@apache.org>
Authored: Fri Jun 24 18:47:59 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Fri Jun 24 18:47:59 2016 -0700
----------------------------------------------------------------------
CMakeLists.txt | 1 +
src/parquet/column/levels.cc | 146 ++++++++++++++++++++++++++++++++++
src/parquet/column/levels.h | 121 ++++------------------------
src/parquet/public-api-test.cc | 7 ++
src/parquet/util/input.cc | 1 +
src/parquet/util/mem-allocator.h | 3 +-
src/parquet/util/output.cc | 1 +
7 files changed, 171 insertions(+), 109 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8751af6..2417449 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -444,6 +444,7 @@ endif()
set(LIBPARQUET_SRCS
src/parquet/types.cc
+ src/parquet/column/levels.cc
src/parquet/column/reader.cc
src/parquet/column/writer.cc
src/parquet/column/scanner.cc
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/column/levels.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.cc b/src/parquet/column/levels.cc
new file mode 100644
index 0000000..6f87ad8
--- /dev/null
+++ b/src/parquet/column/levels.cc
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/column/levels.h"
+
+#include <cstdint>
+
+#include "parquet/util/rle-encoding.h"
+
+namespace parquet {
+
+LevelEncoder::LevelEncoder() {}
+LevelEncoder::~LevelEncoder() {}
+
+void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
+ int num_buffered_values, uint8_t* data, int data_size) {
+ bit_width_ = BitUtil::Log2(max_level + 1);
+ encoding_ = encoding;
+ switch (encoding) {
+ case Encoding::RLE: {
+ rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
+ break;
+ }
+ case Encoding::BIT_PACKED: {
+ int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+ bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
+ break;
+ }
+ default:
+ throw ParquetException("Unknown encoding type for levels.");
+ }
+}
+
+int LevelEncoder::MaxBufferSize(
+ Encoding::type encoding, int16_t max_level, int num_buffered_values) {
+ int bit_width = BitUtil::Log2(max_level + 1);
+ int num_bytes = 0;
+ switch (encoding) {
+ case Encoding::RLE: {
+ // TODO: Due to the way we currently check if the buffer is full enough,
+ // we need to have MinBufferSize as head room.
+ num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
+ RleEncoder::MinBufferSize(bit_width);
+ break;
+ }
+ case Encoding::BIT_PACKED: {
+ num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
+ break;
+ }
+ default:
+ throw ParquetException("Unknown encoding type for levels.");
+ }
+ return num_bytes;
+}
+
+int LevelEncoder::Encode(int batch_size, const int16_t* levels) {
+ int num_encoded = 0;
+ if (!rle_encoder_ && !bit_packed_encoder_) {
+ throw ParquetException("Level encoders are not initialized.");
+ }
+
+ if (encoding_ == Encoding::RLE) {
+ for (int i = 0; i < batch_size; ++i) {
+ if (!rle_encoder_->Put(*(levels + i))) { break; }
+ ++num_encoded;
+ }
+ rle_encoder_->Flush();
+ rle_length_ = rle_encoder_->len();
+ } else {
+ for (int i = 0; i < batch_size; ++i) {
+ if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
+ ++num_encoded;
+ }
+ bit_packed_encoder_->Flush();
+ }
+ return num_encoded;
+}
+
+LevelDecoder::LevelDecoder()
+ : num_values_remaining_(0) {}
+
+LevelDecoder::~LevelDecoder() {}
+
+int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
+ int num_buffered_values, const uint8_t* data) {
+ uint32_t num_bytes = 0;
+ encoding_ = encoding;
+ num_values_remaining_ = num_buffered_values;
+ bit_width_ = BitUtil::Log2(max_level + 1);
+ switch (encoding) {
+ case Encoding::RLE: {
+ num_bytes = *reinterpret_cast<const uint32_t*>(data);
+ const uint8_t* decoder_data = data + sizeof(uint32_t);
+ if (!rle_decoder_) {
+ rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
+ } else {
+ rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
+ }
+ return sizeof(uint32_t) + num_bytes;
+ }
+ case Encoding::BIT_PACKED: {
+ num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+ if (!bit_packed_decoder_) {
+ bit_packed_decoder_.reset(new BitReader(data, num_bytes));
+ } else {
+ bit_packed_decoder_->Reset(data, num_bytes);
+ }
+ return num_bytes;
+ }
+ default:
+ throw ParquetException("Unknown encoding type for levels.");
+ }
+ return -1;
+}
+
+int LevelDecoder::Decode(int batch_size, int16_t* levels) {
+ int num_decoded = 0;
+
+ int num_values = std::min(num_values_remaining_, batch_size);
+ if (encoding_ == Encoding::RLE) {
+ num_decoded = rle_decoder_->GetBatch(levels, num_values);
+ } else {
+ for (int i = 0; i < num_values; ++i) {
+ if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
+ ++num_decoded;
+ }
+ }
+ num_values_remaining_ -= num_decoded;
+ return num_decoded;
+}
+
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/column/levels.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.h b/src/parquet/column/levels.h
index ce751d0..c57ca2f 100644
--- a/src/parquet/column/levels.h
+++ b/src/parquet/column/levels.h
@@ -23,79 +23,28 @@
#include "parquet/exception.h"
#include "parquet/types.h"
-#include "parquet/util/rle-encoding.h"
namespace parquet {
+class BitReader;
+class BitWriter;
+class RleDecoder;
+class RleEncoder;
+
class LevelEncoder {
public:
- LevelEncoder() {}
+ LevelEncoder();
+ ~LevelEncoder();
static int MaxBufferSize(
- Encoding::type encoding, int16_t max_level, int num_buffered_values) {
- int bit_width = BitUtil::Log2(max_level + 1);
- int num_bytes = 0;
- switch (encoding) {
- case Encoding::RLE: {
- // TODO: Due to the way we currently check if the buffer is full enough,
- // we need to have MinBufferSize as head room.
- num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
- RleEncoder::MinBufferSize(bit_width);
- break;
- }
- case Encoding::BIT_PACKED: {
- num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
- break;
- }
- default:
- throw ParquetException("Unknown encoding type for levels.");
- }
- return num_bytes;
- }
+ Encoding::type encoding, int16_t max_level, int num_buffered_values);
// Initialize the LevelEncoder.
void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
- uint8_t* data, int data_size) {
- bit_width_ = BitUtil::Log2(max_level + 1);
- encoding_ = encoding;
- switch (encoding) {
- case Encoding::RLE: {
- rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
- break;
- }
- case Encoding::BIT_PACKED: {
- int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
- bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
- break;
- }
- default:
- throw ParquetException("Unknown encoding type for levels.");
- }
- }
+ uint8_t* data, int data_size);
// Encodes a batch of levels from an array and returns the number of levels encoded
- int Encode(int batch_size, const int16_t* levels) {
- int num_encoded = 0;
- if (!rle_encoder_ && !bit_packed_encoder_) {
- throw ParquetException("Level encoders are not initialized.");
- }
-
- if (encoding_ == Encoding::RLE) {
- for (int i = 0; i < batch_size; ++i) {
- if (!rle_encoder_->Put(*(levels + i))) { break; }
- ++num_encoded;
- }
- rle_encoder_->Flush();
- rle_length_ = rle_encoder_->len();
- } else {
- for (int i = 0; i < batch_size; ++i) {
- if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
- ++num_encoded;
- }
- bit_packed_encoder_->Flush();
- }
- return num_encoded;
- }
+ int Encode(int batch_size, const int16_t* levels);
int32_t len() {
if (encoding_ != Encoding::RLE) {
@@ -114,58 +63,16 @@ class LevelEncoder {
class LevelDecoder {
public:
- LevelDecoder() : num_values_remaining_(0) {}
+ LevelDecoder();
+ ~LevelDecoder();
// Initialize the LevelDecoder state with new data
// and return the number of bytes consumed
int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
- const uint8_t* data) {
- uint32_t num_bytes = 0;
- encoding_ = encoding;
- num_values_remaining_ = num_buffered_values;
- bit_width_ = BitUtil::Log2(max_level + 1);
- switch (encoding) {
- case Encoding::RLE: {
- num_bytes = *reinterpret_cast<const uint32_t*>(data);
- const uint8_t* decoder_data = data + sizeof(uint32_t);
- if (!rle_decoder_) {
- rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
- } else {
- rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
- }
- return sizeof(uint32_t) + num_bytes;
- }
- case Encoding::BIT_PACKED: {
- num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
- if (!bit_packed_decoder_) {
- bit_packed_decoder_.reset(new BitReader(data, num_bytes));
- } else {
- bit_packed_decoder_->Reset(data, num_bytes);
- }
- return num_bytes;
- }
- default:
- throw ParquetException("Unknown encoding type for levels.");
- }
- return -1;
- }
+ const uint8_t* data);
// Decodes a batch of levels into an array and returns the number of levels decoded
- int Decode(int batch_size, int16_t* levels) {
- int num_decoded = 0;
-
- int num_values = std::min(num_values_remaining_, batch_size);
- if (encoding_ == Encoding::RLE) {
- num_decoded = rle_decoder_->GetBatch(levels, num_values);
- } else {
- for (int i = 0; i < num_values; ++i) {
- if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
- ++num_decoded;
- }
- }
- num_values_remaining_ -= num_decoded;
- return num_decoded;
- }
+ int Decode(int batch_size, int16_t* levels);
private:
int bit_width_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/public-api-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc
index 1dc7621..e307f52 100644
--- a/src/parquet/public-api-test.cc
+++ b/src/parquet/public-api-test.cc
@@ -20,6 +20,7 @@
#include "parquet/api/io.h"
#include "parquet/api/reader.h"
#include "parquet/api/schema.h"
+#include "parquet/api/writer.h"
namespace parquet {
@@ -29,4 +30,10 @@ TEST(TestPublicAPI, DoesNotIncludeThrift) {
#endif
}
+TEST(TestPublicAPI, DoesNotExportDCHECK) {
+#ifdef DCHECK
+ FAIL() << "parquet/util/logging.h should not be transitively included";
+#endif
+}
+
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/input.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/input.cc b/src/parquet/util/input.cc
index 73ca8a5..e1659f7 100644
--- a/src/parquet/util/input.cc
+++ b/src/parquet/util/input.cc
@@ -24,6 +24,7 @@
#include "parquet/exception.h"
#include "parquet/util/buffer.h"
+#include "parquet/util/logging.h"
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/mem-allocator.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/mem-allocator.h b/src/parquet/util/mem-allocator.h
index eb68f02..3f387c3 100644
--- a/src/parquet/util/mem-allocator.h
+++ b/src/parquet/util/mem-allocator.h
@@ -18,8 +18,7 @@
#ifndef PARQUET_UTIL_MEMORY_POOL_H
#define PARQUET_UTIL_MEMORY_POOL_H
-#include "parquet/util/logging.h"
-#include "parquet/util/bit-util.h"
+#include <cstdint>
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/output.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/output.cc b/src/parquet/util/output.cc
index 4f024a5..6fc4ed8 100644
--- a/src/parquet/util/output.cc
+++ b/src/parquet/util/output.cc
@@ -22,6 +22,7 @@
#include "parquet/exception.h"
#include "parquet/util/buffer.h"
+#include "parquet/util/logging.h"
namespace parquet {