You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2016/03/01 01:51:49 UTC

parquet-cpp git commit: PARQUET-463: Add local DCHECK macros, fix some dcheck bugs exposed

Repository: parquet-cpp
Updated Branches:
  refs/heads/master 41c1e6887 -> eab9a7342


PARQUET-463: Add local DCHECK macros, fix some dcheck bugs exposed

I was also able to remove the `-Wno-unused-value` compiler flag. Removing `-Wno-unused-variable` will have to take place in another patch (more work required).

Author: Wes McKinney <we...@apache.org>

Closes #67 from wesm/PARQUET-463 and squashes the following commits:

da3afb2 [Wes McKinney] Fix signed-unsigned comparisons inside dchecks
a1ca479 [Wes McKinney] Remove -Wno-unused-value
0b49cc6 [Wes McKinney] Adapt simple dcheck macros from Kudu, fix dcheck failures


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/eab9a734
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/eab9a734
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/eab9a734

Branch: refs/heads/master
Commit: eab9a7342095b730726336a90b91cc181c4c2c50
Parents: 41c1e68
Author: Wes McKinney <we...@apache.org>
Authored: Mon Feb 29 16:51:45 2016 -0800
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Mon Feb 29 16:51:45 2016 -0800

----------------------------------------------------------------------
 CMakeLists.txt                              |  3 +-
 src/parquet/encodings/dictionary-encoding.h |  9 ++-
 src/parquet/encodings/encoding-test.cc      |  4 +
 src/parquet/util/bit-stream-utils.inline.h  |  4 +-
 src/parquet/util/bit-util-test.cc           |  8 ++
 src/parquet/util/cpu-info.h                 |  4 +
 src/parquet/util/logging.h                  | 98 +++++++++++++++++++++---
 src/parquet/util/mem-pool.h                 |  2 +-
 src/parquet/util/rle-encoding.h             |  2 +-
 9 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0076449..de3dd14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -244,8 +244,7 @@ message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
 
 # Build with C++11 and SSE3 by default
 # TODO(wesm): These compiler warning suppressions should be removed one by one
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-value -Wno-unused-variable")
-
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -msse3 -Wall -Wno-unused-variable")
 
 if (APPLE)
   # Use libc++ to avoid linker errors on some platforms

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h
index 19ef1ea..eba9b49 100644
--- a/src/parquet/encodings/dictionary-encoding.h
+++ b/src/parquet/encodings/dictionary-encoding.h
@@ -27,6 +27,7 @@
 #include "parquet/encodings/decoder.h"
 #include "parquet/encodings/encoder.h"
 #include "parquet/encodings/plain-encoding.h"
+#include "parquet/util/cpu-info.h"
 #include "parquet/util/dict-encoding.h"
 #include "parquet/util/hash-util.h"
 #include "parquet/util/mem-pool.h"
@@ -203,7 +204,11 @@ class DictEncoderBase {
       mod_bitmask_(hash_table_size_ - 1),
       hash_slots_(hash_table_size_, HASH_SLOT_EMPTY),
       pool_(pool),
-      dict_encoded_size_(0) {}
+      dict_encoded_size_(0) {
+    if (!CpuInfo::initialized()) {
+      CpuInfo::Init();
+    }
+  }
 
   /// Size of the table. Must be a power of 2.
   int hash_table_size_;
@@ -426,6 +431,8 @@ inline int DictEncoderBase::WriteIndices(uint8_t* buffer, int buffer_len) {
     if (!encoder.Put(index)) return -1;
   }
   encoder.Flush();
+
+  ClearIndices();
   return 1 + encoder.len();
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/encodings/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/encoding-test.cc b/src/parquet/encodings/encoding-test.cc
index f060f96..f45736a 100644
--- a/src/parquet/encodings/encoding-test.cc
+++ b/src/parquet/encodings/encoding-test.cc
@@ -155,6 +155,10 @@ class TestEncodingBase : public ::testing::Test {
     }
   }
 
+  void TearDown() {
+    pool_.FreeAll();
+  }
+
   void InitData(int nvalues, int repeats) {
     num_values_ = nvalues * repeats;
     input_bytes_.resize(num_values_ * sizeof(T));

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h
index e0dcab8..fc90244 100644
--- a/src/parquet/util/bit-stream-utils.inline.h
+++ b/src/parquet/util/bit-stream-utils.inline.h
@@ -90,7 +90,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
   DCHECK(buffer_ != NULL);
   // TODO: revisit this limit if necessary
   DCHECK_LE(num_bits, 32);
-  DCHECK_LE(num_bits, sizeof(T) * 8);
+  DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
 
   if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
 
@@ -118,7 +118,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
 
 template<typename T>
 inline bool BitReader::GetAligned(int num_bytes, T* v) {
-  DCHECK_LE(num_bytes, sizeof(T));
+  DCHECK_LE(num_bytes, static_cast<int>(sizeof(T)));
   int bytes_read = BitUtil::Ceil(bit_offset_, 8);
   if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-util-test.cc b/src/parquet/util/bit-util-test.cc
index 5ea4c11..90c1167 100644
--- a/src/parquet/util/bit-util-test.cc
+++ b/src/parquet/util/bit-util-test.cc
@@ -32,6 +32,12 @@
 
 namespace parquet_cpp {
 
+static void ensure_cpu_info_initialized() {
+  if (!CpuInfo::initialized()) {
+    CpuInfo::Init();
+  }
+}
+
 TEST(BitUtil, Ceil) {
   EXPECT_EQ(BitUtil::Ceil(0, 1), 0);
   EXPECT_EQ(BitUtil::Ceil(1, 1), 1);
@@ -71,6 +77,8 @@ TEST(BitUtil, RoundDown) {
 }
 
 TEST(BitUtil, Popcount) {
+  ensure_cpu_info_initialized();
+
   EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
   EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
   EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6);

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/cpu-info.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/cpu-info.h b/src/parquet/util/cpu-info.h
index 9026cde..e293418 100644
--- a/src/parquet/util/cpu-info.h
+++ b/src/parquet/util/cpu-info.h
@@ -93,6 +93,10 @@ class CpuInfo {
     return model_name_;
   }
 
+  static bool initialized() {
+    return initialized_;
+  }
+
  private:
   static bool initialized_;
   static int64_t hardware_flags_;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/logging.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h
index 3094373..4c93f86 100644
--- a/src/parquet/util/logging.h
+++ b/src/parquet/util/logging.h
@@ -20,14 +20,90 @@
 
 #include <iostream>
 
-#define DCHECK(condition) while (false) std::cout
-#define DCHECK_EQ(a, b) while (false) std::cout
-#define DCHECK_NE(a, b) while (false) std::cout
-#define DCHECK_GT(a, b) while (false) std::cout
-#define DCHECK_LT(a, b) while (false) std::cout
-#define DCHECK_GE(a, b) while (false) std::cout
-#define DCHECK_LE(a, b) while (false) std::cout
-// Similar to how glog defines DCHECK for release.
-#define LOG(level) while (false) std::cout
-
-#endif
+namespace parquet_cpp {
+
+// Stubbed versions of macros defined in glog/logging.h, intended for
+// environments where glog headers aren't available.
+//
+// Add more as needed.
+
+// Log levels. LOG ignores them, so their values are abitrary.
+
+#define PARQUET_INFO 0
+#define PARQUET_WARNING 1
+#define PARQUET_ERROR 2
+#define PARQUET_FATAL 3
+
+#define PARQUET_LOG_INTERNAL(level) parquet_cpp::internal::CerrLog(level)
+#define PARQUET_LOG(level) PARQUET_LOG_INTERNAL(PARQUET_##level)
+
+#define PARQUET_CHECK(condition) \
+  (condition) ? 0 : PARQUET_LOG(FATAL) << "Check failed: " #condition " "
+
+#ifdef NDEBUG
+#define PARQUET_DFATAL PARQUET_WARNING
+
+#define DCHECK(condition) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_EQ(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_NE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_LE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_LT(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_GE(val1, val2) while (false) parquet_cpp::internal::NullLog()
+#define DCHECK_GT(val1, val2) while (false) parquet_cpp::internal::NullLog()
+
+#else
+#define PARQUET_DFATAL PARQUET_FATAL
+
+#define DCHECK(condition) PARQUET_CHECK(condition)
+#define DCHECK_EQ(val1, val2) PARQUET_CHECK((val1) == (val2))
+#define DCHECK_NE(val1, val2) PARQUET_CHECK((val1) != (val2))
+#define DCHECK_LE(val1, val2) PARQUET_CHECK((val1) <= (val2))
+#define DCHECK_LT(val1, val2) PARQUET_CHECK((val1) < (val2))
+#define DCHECK_GE(val1, val2) PARQUET_CHECK((val1) >= (val2))
+#define DCHECK_GT(val1, val2) PARQUET_CHECK((val1) > (val2))
+
+#endif // NDEBUG
+
+namespace internal {
+
+class NullLog {
+ public:
+  template<class T>
+  NullLog& operator<<(const T& t) {
+    return *this;
+  }
+};
+
+class CerrLog {
+ public:
+  CerrLog(int severity) // NOLINT(runtime/explicit)
+    : severity_(severity),
+      has_logged_(false) {
+  }
+
+  ~CerrLog() {
+    if (has_logged_) {
+      std::cerr << std::endl;
+    }
+    if (severity_ == PARQUET_FATAL) {
+      exit(1);
+    }
+  }
+
+  template<class T>
+  CerrLog& operator<<(const T& t) {
+    has_logged_ = true;
+    std::cerr << t;
+    return *this;
+  }
+
+ private:
+  const int severity_;
+  bool has_logged_;
+};
+
+} // namespace internal
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_LOGGING_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/mem-pool.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/mem-pool.h b/src/parquet/util/mem-pool.h
index 88a8715..3f21aa7 100644
--- a/src/parquet/util/mem-pool.h
+++ b/src/parquet/util/mem-pool.h
@@ -197,7 +197,7 @@ class MemPool {
     DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
     info.allocated_bytes += num_bytes;
     total_allocated_bytes_ += num_bytes;
-    DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+    DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
     peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
     return result;
   }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/eab9a734/src/parquet/util/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h
index b8dcc8e..ca9fa4f 100644
--- a/src/parquet/util/rle-encoding.h
+++ b/src/parquet/util/rle-encoding.h
@@ -292,7 +292,7 @@ bool RleDecoder::NextCounts() {
 /// This function buffers input values 8 at a time.  After seeing all 8 values,
 /// it decides whether they should be encoded as a literal or repeated run.
 inline bool RleEncoder::Put(uint64_t value) {
-  DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
+  DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_));
   if (UNLIKELY(buffer_full_)) return false;
 
   if (LIKELY(current_value_ == value)) {