You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2016/03/01 01:51:49 UTC
parquet-cpp git commit: PARQUET-463: Add local DCHECK macros,
fix some dcheck bugs exposed
Repository: parquet-cpp
Updated Branches:
refs/heads/master 41c1e6887 -> eab9a7342
PARQUET-463: Add local DCHECK macros, fix some dcheck bugs exposed
I was also able to remove the `-Wno-unused-value` compiler flag. Removing `-Wno-unused-variable` will have to take place in another patch (more work required).
Author: Wes McKinney <we...@apache.org>
Closes #67 from wesm/PARQUET-463 and squashes the following commits:
da3afb2 [Wes McKinney] Fix signed-unsigned comparisons inside dchecks
a1ca479 [Wes McKinney] Remove -Wno-unused-value
0b49cc6 [Wes McKinney] Adapt simple dcheck macros from Kudu, fix dcheck failures
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/eab9a734
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/eab9a734
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/eab9a734
Branch: refs/heads/master
Commit: eab9a7342095b730726336a90b91cc181c4c2c50
Parents: 41c1e68
Author: Wes McKinney <we...@apache.org>
Authored: Mon Feb 29 16:51:45 2016 -0800
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Mon Feb 29 16:51:45 2016 -0800
----------------------------------------------------------------------
CMakeLists.txt | 3 +-
src/parquet/encodings/dictionary-encoding.h | 9 ++-
src/parquet/encodings/encoding-test.cc | 4 +
src/parquet/util/bit-stream-utils.inline.h | 4 +-
src/parquet/util/bit-util-test.cc | 8 ++
src/parquet/util/cpu-info.h | 4 +
src/parquet/util/logging.h | 98 +++++++++++++++++++++---
src/parquet/util/mem-pool.h | 2 +-
src/parquet/util/rle-encoding.h | 2 +-
9 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0076449..de3dd14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -244,8 +244,7 @@ message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
# Build with C++11 and SSE3 by default
# TODO(wesm): These compiler warning suppressions should be removed one by one
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-value -Wno-unused-variable")
-
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-variable")
if (APPLE)
# Use libc++ to avoid linker errors on some platforms
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h
index 19ef1ea..eba9b49 100644
--- a/src/parquet/encodings/dictionary-encoding.h
+++ b/src/parquet/encodings/dictionary-encoding.h
@@ -27,6 +27,7 @@
#include "parquet/encodings/decoder.h"
#include "parquet/encodings/encoder.h"
#include "parquet/encodings/plain-encoding.h"
+#include "parquet/util/cpu-info.h"
#include "parquet/util/dict-encoding.h"
#include "parquet/util/hash-util.h"
#include "parquet/util/mem-pool.h"
@@ -203,7 +204,11 @@ class DictEncoderBase {
mod_bitmask_(hash_table_size_ - 1),
hash_slots_(hash_table_size_, HASH_SLOT_EMPTY),
pool_(pool),
- dict_encoded_size_(0) {}
+ dict_encoded_size_(0) {
+ if (!CpuInfo::initialized()) {
+ CpuInfo::Init();
+ }
+ }
/// Size of the table. Must be a power of 2.
int hash_table_size_;
@@ -426,6 +431,8 @@ inline int DictEncoderBase::WriteIndices(uint8_t* buffer, int buffer_len) {
if (!encoder.Put(index)) return -1;
}
encoder.Flush();
+
+ ClearIndices();
return 1 + encoder.len();
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/encoding-test.cc b/src/parquet/encodings/encoding-test.cc
index f060f96..f45736a 100644
--- a/src/parquet/encodings/encoding-test.cc
+++ b/src/parquet/encodings/encoding-test.cc
@@ -155,6 +155,10 @@ class TestEncodingBase : public ::testing::Test {
}
}
+ void TearDown() {
+ pool_.FreeAll();
+ }
+
void InitData(int nvalues, int repeats) {
num_values_ = nvalues * repeats;
input_bytes_.resize(num_values_ * sizeof(T));
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h
index e0dcab8..fc90244 100644
--- a/src/parquet/util/bit-stream-utils.inline.h
+++ b/src/parquet/util/bit-stream-utils.inline.h
@@ -90,7 +90,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
DCHECK(buffer_ != NULL);
// TODO: revisit this limit if necessary
DCHECK_LE(num_bits, 32);
- DCHECK_LE(num_bits, sizeof(T) * 8);
+ DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
@@ -118,7 +118,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
template<typename T>
inline bool BitReader::GetAligned(int num_bytes, T* v) {
- DCHECK_LE(num_bytes, sizeof(T));
+ DCHECK_LE(num_bytes, static_cast<int>(sizeof(T)));
int bytes_read = BitUtil::Ceil(bit_offset_, 8);
if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-util-test.cc b/src/parquet/util/bit-util-test.cc
index 5ea4c11..90c1167 100644
--- a/src/parquet/util/bit-util-test.cc
+++ b/src/parquet/util/bit-util-test.cc
@@ -32,6 +32,12 @@
namespace parquet_cpp {
+static void ensure_cpu_info_initialized() {
+ if (!CpuInfo::initialized()) {
+ CpuInfo::Init();
+ }
+}
+
TEST(BitUtil, Ceil) {
EXPECT_EQ(BitUtil::Ceil(0, 1), 0);
EXPECT_EQ(BitUtil::Ceil(1, 1), 1);
@@ -71,6 +77,8 @@ TEST(BitUtil, RoundDown) {
}
TEST(BitUtil, Popcount) {
+ ensure_cpu_info_initialized();
+
EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/cpu-info.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/cpu-info.h b/src/parquet/util/cpu-info.h
index 9026cde..e293418 100644
--- a/src/parquet/util/cpu-info.h
+++ b/src/parquet/util/cpu-info.h
@@ -93,6 +93,10 @@ class CpuInfo {
return model_name_;
}
+ static bool initialized() {
+ return initialized_;
+ }
+
private:
static bool initialized_;
static int64_t hardware_flags_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/logging.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h
index 3094373..4c93f86 100644
--- a/src/parquet/util/logging.h
+++ b/src/parquet/util/logging.h
@@ -20,14 +20,90 @@
#include <iostream>
-#define DCHECK(condition) while (false) std::cout
-#define DCHECK_EQ(a, b) while (false) std::cout
-#define DCHECK_NE(a, b) while (false) std::cout
-#define DCHECK_GT(a, b) while (false) std::cout
-#define DCHECK_LT(a, b) while (false) std::cout
-#define DCHECK_GE(a, b) while (false) std::cout
-#define DCHECK_LE(a, b) while (false) std::cout
-// Similar to how glog defines DCHECK for release.
-#define LOG(level) while (false) std::cout
-
-#endif
+namespace parquet_cpp {
+
+// Stubbed versions of macros defined in glog/logging.h, intended for
+// environments where glog headers aren't available.
+//
+// Add more as needed.
+
+// Log levels. LOG ignores them, so their values are abitrary.
+
+#define PARQUET_INFO 0
+#define PARQUET_WARNING 1
+#define PARQUET_ERROR 2
+#define PARQUET_FATAL 3
+
+#define PARQUET_LOG_INTERNAL(level) parquet_cpp::internal::CerrLog(level)
+#define PARQUET_LOG(level) PARQUET_LOG_INTERNAL(PARQUET_##level)
+
+#define PARQUET_CHECK(condition) \
+ (condition) ? 0 : PARQUET_LOG(FATAL) << "Check failed: " #condition " "
+
+#ifdef NDEBUG
+#define PARQUET_DFATAL PARQUET_WARNING
+
+#define DCHECK(condition) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_EQ(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_NE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_LE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_LT(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_GE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_GT(val1, val2) while (false) parquet_cpp::internal::NullLog()
+
+#else
+#define PARQUET_DFATAL PARQUET_FATAL
+
+#define DCHECK(condition) PARQUET_CHECK(condition)
+#define DCHECK_EQ(val1, val2) PARQUET_CHECK((val1) == (val2))
+#define DCHECK_NE(val1, val2) PARQUET_CHECK((val1) != (val2))
+#define DCHECK_LE(val1, val2) PARQUET_CHECK((val1) <= (val2))
+#define DCHECK_LT(val1, val2) PARQUET_CHECK((val1) < (val2))
+#define DCHECK_GE(val1, val2) PARQUET_CHECK((val1) >= (val2))
+#define DCHECK_GT(val1, val2) PARQUET_CHECK((val1) > (val2))
+
+#endif // NDEBUG
+
+namespace internal {
+
+class NullLog {
+ public:
+ template<class T>
+ NullLog& operator<<(const T& t) {
+ return *this;
+ }
+};
+
+class CerrLog {
+ public:
+ CerrLog(int severity) // NOLINT(runtime/explicit)
+ : severity_(severity),
+ has_logged_(false) {
+ }
+
+ ~CerrLog() {
+ if (has_logged_) {
+ std::cerr << std::endl;
+ }
+ if (severity_ == PARQUET_FATAL) {
+ exit(1);
+ }
+ }
+
+ template<class T>
+ CerrLog& operator<<(const T& t) {
+ has_logged_ = true;
+ std::cerr << t;
+ return *this;
+ }
+
+ private:
+ const int severity_;
+ bool has_logged_;
+};
+
+} // namespace internal
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_LOGGING_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/mem-pool.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/mem-pool.h b/src/parquet/util/mem-pool.h
index 88a8715..3f21aa7 100644
--- a/src/parquet/util/mem-pool.h
+++ b/src/parquet/util/mem-pool.h
@@ -197,7 +197,7 @@ class MemPool {
DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
info.allocated_bytes += num_bytes;
total_allocated_bytes_ += num_bytes;
- DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+ DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
return result;
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h
index b8dcc8e..ca9fa4f 100644
--- a/src/parquet/util/rle-encoding.h
+++ b/src/parquet/util/rle-encoding.h
@@ -292,7 +292,7 @@ bool RleDecoder::NextCounts() {
/// This function buffers input values 8 at a time. After seeing all 8 values,
/// it decides whether they should be encoded as a literal or repeated run.
inline bool RleEncoder::Put(uint64_t value) {
- DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
+ DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_));
if (UNLIKELY(buffer_full_)) return false;
if (LIKELY(current_value_ == value)) {