You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/13 15:52:24 UTC

[arrow] branch master updated: ARROW-4019: [C++] Fix Coverity issues

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e34057c  ARROW-4019: [C++] Fix Coverity issues
e34057c is described below

commit e34057c4b4be8c7abf3537dd4998b5b38919ba73
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Dec 13 09:52:17 2018 -0600

    ARROW-4019: [C++] Fix Coverity issues
    
    This fixes a number of issues found by Coverity.
    
    Other issues are benign, or need to be tackled separately.
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #3168 from pitrou/ARROW-4019-fix-coverity-issues and squashes the following commits:
    
    6311aa99a <Antoine Pitrou> ARROW-4019:  Fix Coverity issues
---
 cpp/src/arrow/array.h                      |  8 ++---
 cpp/src/arrow/compute/kernel.h             |  9 +++++
 cpp/src/arrow/io/buffered-test.cc          |  2 +-
 cpp/src/arrow/io/file-test.cc              |  2 +-
 cpp/src/arrow/io/test-common.h             |  2 +-
 cpp/src/arrow/ipc/json-integration-test.cc |  2 +-
 cpp/src/arrow/ipc/json.cc                  |  2 +-
 cpp/src/arrow/ipc/writer.cc                |  5 ++-
 cpp/src/arrow/table.cc                     |  2 +-
 cpp/src/arrow/util/decimal.cc              | 57 +++++++++++++++++-------------
 cpp/src/arrow/util/logging.h               |  3 ++
 cpp/src/arrow/util/rle-encoding-test.cc    |  2 +-
 cpp/src/arrow/util/rle-encoding.h          |  1 +
 13 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index b34b539..37fa5ae 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -87,7 +87,7 @@ class Status;
 /// input array and replace them with newly-allocated data, changing the output
 /// data type as well.
 struct ARROW_EXPORT ArrayData {
-  ArrayData() : length(0) {}
+  ArrayData() : length(0), null_count(0), offset(0) {}
 
   ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
@@ -311,7 +311,7 @@ class ARROW_EXPORT Array {
   std::string ToString() const;
 
  protected:
-  Array() {}
+  Array() : null_bitmap_data_(NULLPTR) {}
 
   std::shared_ptr<ArrayData> data_;
   const uint8_t* null_bitmap_data_;
@@ -382,7 +382,7 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
   std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
 
  protected:
-  PrimitiveArray() {}
+  PrimitiveArray() : raw_values_(NULLPTR) {}
 
   inline void SetData(const std::shared_ptr<ArrayData>& data) {
     auto values = data->buffers[1];
@@ -565,7 +565,7 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
 
  protected:
   // For subclasses
-  BinaryArray() {}
+  BinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
 
   /// Protected method for constructors
   void SetData(const std::shared_ptr<ArrayData>& data);
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 8048fff..bef2b9a 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -19,6 +19,7 @@
 #define ARROW_COMPUTE_KERNEL_H
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "arrow/array.h"
@@ -78,6 +79,14 @@ struct ARROW_EXPORT Datum {
 
   Datum(const Datum& other) noexcept { this->value = other.value; }
 
+  // Define move constructor and move assignment, for better performance
+  Datum(Datum&& other) noexcept : value(std::move(other.value)) {}
+
+  Datum& operator=(Datum&& other) noexcept {
+    value = std::move(other.value);
+    return *this;
+  }
+
   Datum::type kind() const {
     switch (this->value.which()) {
       case 0:
diff --git a/cpp/src/arrow/io/buffered-test.cc b/cpp/src/arrow/io/buffered-test.cc
index 7fc4c52..074833d 100644
--- a/cpp/src/arrow/io/buffered-test.cc
+++ b/cpp/src/arrow/io/buffered-test.cc
@@ -67,7 +67,7 @@ class FileTestFixture : public ::testing::Test {
 
   void EnsureFileDeleted() {
     if (FileExists(path_)) {
-      std::remove(path_.c_str());
+      ARROW_UNUSED(std::remove(path_.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/io/file-test.cc b/cpp/src/arrow/io/file-test.cc
index 6081005..4d710d3 100644
--- a/cpp/src/arrow/io/file-test.cc
+++ b/cpp/src/arrow/io/file-test.cc
@@ -56,7 +56,7 @@ class FileTestFixture : public ::testing::Test {
 
   void EnsureFileDeleted() {
     if (FileExists(path_)) {
-      std::remove(path_.c_str());
+      ARROW_UNUSED(std::remove(path_.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.h
index fa91452..a091b01 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.h
@@ -118,7 +118,7 @@ class MemoryMapFixture {
  public:
   void TearDown() {
     for (auto path : tmp_files_) {
-      std::remove(path.c_str());
+      ARROW_UNUSED(std::remove(path.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc
index 3e71415..914cdb6 100644
--- a/cpp/src/arrow/ipc/json-integration-test.cc
+++ b/cpp/src/arrow/ipc/json-integration-test.cc
@@ -262,7 +262,7 @@ class TestJSONIntegration : public ::testing::Test {
 
   void TearDown() {
     for (const std::string path : tmp_paths_) {
-      std::remove(path.c_str());
+      ARROW_UNUSED(std::remove(path.c_str()));
     }
   }
 
diff --git a/cpp/src/arrow/ipc/json.cc b/cpp/src/arrow/ipc/json.cc
index 394563c..61c242c 100644
--- a/cpp/src/arrow/ipc/json.cc
+++ b/cpp/src/arrow/ipc/json.cc
@@ -99,7 +99,7 @@ Status JsonWriter::WriteRecordBatch(const RecordBatch& batch) {
 class JsonReader::JsonReaderImpl {
  public:
   JsonReaderImpl(MemoryPool* pool, const std::shared_ptr<Buffer>& data)
-      : pool_(pool), data_(data) {}
+      : pool_(pool), data_(data), record_batches_(nullptr) {}
 
   Status ParseAndReadSchema() {
     doc_.Parse(reinterpret_cast<const rj::Document::Ch*>(data_->data()),
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 3d3355d..6ce72e0 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -772,7 +772,10 @@ class SchemaWriter : public StreamBookKeeper {
  public:
   SchemaWriter(const Schema& schema, DictionaryMemo* dictionary_memo, MemoryPool* pool,
                io::OutputStream* sink)
-      : StreamBookKeeper(sink), schema_(schema), dictionary_memo_(dictionary_memo) {}
+      : StreamBookKeeper(sink),
+        pool_(pool),
+        schema_(schema),
+        dictionary_memo_(dictionary_memo) {}
 
   Status WriteSchema() {
 #ifndef NDEBUG
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 04af4d9..1f3d927 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -392,7 +392,7 @@ class SimpleTable : public Table {
   std::vector<std::shared_ptr<Column>> columns_;
 };
 
-Table::Table() {}
+Table::Table() : num_rows_(0) {}
 
 std::shared_ptr<Table> Table::Make(const std::shared_ptr<Schema>& schema,
                                    const std::vector<std::shared_ptr<Column>>& columns,
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index fda7746..c47ac82 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -889,7 +889,7 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
 }
 
 // Helper function used by Decimal128::FromBigEndian
-static inline uint64_t FromBigEndian(const uint8_t* bytes, int32_t length) {
+static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
   // We don't bounds check the length here because this is called by
   // FromBigEndian that has a Decimal128 as its out parameters and
   // that function is already checking the length of the bytes and only
@@ -906,8 +906,7 @@ Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal12
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 16;
 
-  int64_t high;
-  uint64_t low;
+  int64_t high, low;
 
   if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
     std::ostringstream stream;
@@ -917,35 +916,45 @@ Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal12
     return Status::Invalid(stream.str());
   }
 
-  /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
-  /// sign bit.
+  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+  // sign bit.
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
 
-  /// Sign extend the low bits if necessary
-  low = UINT64_MAX * (is_negative && length < 8);
-  high = -1 * (is_negative && length < kMaxDecimalBytes);
-
-  /// Stop byte of the high bytes
+  // 1. Extract the high bytes
+  // Stop byte of the high bytes
   const int32_t high_bits_offset = std::max(0, length - 8);
+  const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
 
-  /// Shift left enough bits to make room for the incoming int64_t
-  high <<= high_bits_offset * CHAR_BIT;
-
-  /// Preserve the upper bits by inplace OR-ing the int64_t
-  uint64_t value = arrow::FromBigEndian(bytes, high_bits_offset);
-  high |= value;
+  if (high_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    high = high_bits;
+  } else {
+    high = -1 * (is_negative && length < kMaxDecimalBytes);
+    // Shift left enough bits to make room for the incoming int64_t
+    high <<= high_bits_offset * CHAR_BIT;
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    high |= high_bits;
+  }
 
-  /// Stop byte of the low bytes
+  // 2. Extract the low bytes
+  // Stop byte of the low bytes
   const int32_t low_bits_offset = std::min(length, 8);
+  const auto low_bits =
+      UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
 
-  /// Shift left enough bits to make room for the incoming uint64_t
-  low <<= low_bits_offset * CHAR_BIT;
-
-  /// Preserve the upper bits by inplace OR-ing the uint64_t
-  value = arrow::FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
-  low |= value;
+  if (low_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    low = low_bits;
+  } else {
+    // Sign extend the low bits if necessary
+    low = -1 * (is_negative && length < 8);
+    // Shift left enough bits to make room for the incoming int64_t
+    low <<= low_bits_offset * CHAR_BIT;
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    low |= low_bits;
+  }
 
-  *out = Decimal128(high, low);
+  *out = Decimal128(high, static_cast<uint64_t>(low));
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 4cce700..42ab18e 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <string>
 
+#include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -155,6 +156,8 @@ class ARROW_EXPORT ArrowLog : public ArrowLogBase {
   static void InstallFailureSignalHandler();
 
  private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog);
+
   // Hide the implementation of log provider by void *.
   // Otherwise, lib user may define the same macro to use the correct header file.
   void* logging_provider_;
diff --git a/cpp/src/arrow/util/rle-encoding-test.cc b/cpp/src/arrow/util/rle-encoding-test.cc
index 8838261..aac1b15 100644
--- a/cpp/src/arrow/util/rle-encoding-test.cc
+++ b/cpp/src/arrow/util/rle-encoding-test.cc
@@ -193,7 +193,7 @@ void ValidateRle(const vector<int>& values, int bit_width, uint8_t* expected_enc
     EXPECT_EQ(encoded_len, expected_len);
   }
   if (expected_encoding != NULL) {
-    EXPECT_EQ(memcmp(buffer, expected_encoding, expected_len), 0);
+    EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0);
   }
 
   // Verify read
diff --git a/cpp/src/arrow/util/rle-encoding.h b/cpp/src/arrow/util/rle-encoding.h
index a97543d..acefc8e 100644
--- a/cpp/src/arrow/util/rle-encoding.h
+++ b/cpp/src/arrow/util/rle-encoding.h
@@ -436,6 +436,7 @@ bool RleDecoder::NextCounts() {
     literal_count_ = (indicator_value >> 1) * 8;
   } else {
     repeat_count_ = indicator_value >> 1;
+    // XXX (ARROW-4018) this is not big-endian compatible
     bool result =
         bit_reader_.GetAligned<T>(static_cast<int>(BitUtil::CeilDiv(bit_width_, 8)),
                                   reinterpret_cast<T*>(&current_value_));