You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/03 16:35:55 UTC

[impala] 03/06: IMPALA-5031: NULL is undefined in memcpy and memcmp

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 24b7a3bf56400ac4007de1bb16761bafac98b5a6
Author: Jim Apple <jb...@apache.org>
AuthorDate: Sat May 25 11:45:21 2019 -0700

    IMPALA-5031: NULL is undefined in memcpy and memcmp
    
    This patch fixes UBSAN "null pointer passed as argument" errors in the
    end-to-end tests. These are undefined behavior according to "7.1.4 Use
    of library functions" in the C99 standard (which is included in C++14
    in section [intro.refs]):
    
        If an argument to a function has an invalid value (such as a value
        outside the domain of the function, or a pointer outside the
        address space of the program, or a null pointer, or a pointer to
        non-modifiable storage when the corresponding parameter is not
        const-qualified) or a type (after promotion) not expected by a
        function with variable number of arguments, the behavior is
        undefined.
    
    The interesting parts of the backtraces are:
    
        runtime/sorter.cc:575:18: runtime error: null pointer passed as
           argument 2, which is declared to never be null
        /usr/include/string.h:43:45: note: nonnull attribute specified
           here
        #0 Sorter::Run::CopyVarLenData(vector<StringValue*> const&,
           unsigned char*) runtime/sorter.cc:575:5
        #1 Status Sorter::Run::AddBatchInternal<true, true>(RowBatch*,
           int, int*) runtime/sorter.cc:232:11
        #2 Sorter::Run::AddInputBatch(RowBatch*, int, int*)
           runtime/sorter.cc:660:12
        #3 Sorter::AddBatchNoSpill(RowBatch*, int, int*)
           runtime/sorter.cc:882:58
        #4 Sorter::AddBatch(RowBatch*) runtime/sorter.cc:862:45
        #5 SortNode::SortInput(RuntimeState*) exec/sort-node.cc:177:54
        #6 SortNode::Open(RuntimeState*) exec/sort-node.cc:90:43
    
        runtime/tuple.cc:105:25: runtime error: null pointer passed as
           argument 2, which is declared to never be null
        /usr/include/string.h:43:45: note: nonnull attribute specified
           here
        #0 Tuple::DeepCopyVarlenData(TupleDescriptor const&, MemPool*)
           runtime/tuple.cc:105:5
        #1 Tuple::DeepCopy(Tuple*, TupleDescriptor const&, MemPool*)
           runtime/tuple.cc:94:35
        #2 Tuple::DeepCopy(TupleDescriptor const&, MemPool*)
           runtime/tuple.cc:85:3
        #3 KrpcDataStreamSender::Channel::AddRow(TupleRow*)
           runtime/krpc-data-stream-sender.cc:509:43
        #4 KrpcDataStreamSender::AddRowToChannel(int, TupleRow*)
           runtime/krpc-data-stream-sender.cc:846
        #5 (<unknown module>)
    
        runtime/tuple.cc:146:19: runtime error: null pointer passed as
           argument 2, which is declared to never be null
        /usr/include/string.h:43:45: note: nonnull attribute specified
           here
        #0 Tuple::DeepCopyVarlenData(TupleDescriptor const&, char**, int*,
           bool) runtime/tuple.cc:146:5
        #1 Tuple::DeepCopy(TupleDescriptor const&, char**, int*, bool)
           runtime/tuple.cc:135:35
        #2 RowBatch::SerializeInternal(long, FixedSizeHashTable<Tuple*,
           int>*, vector<int>*, string*) runtime/row-batch.cc:392:14
        #3 RowBatch::Serialize(bool, vector<int>*, string*, long*, bool*)
           runtime/row-batch.cc:290:45
        #4 RowBatch::Serialize(OutboundRowBatch*)
           runtime/row-batch.cc:259:43
        #5 KrpcDataStreamSender::SerializeBatch(RowBatch*,
           OutboundRowBatch*, int) runtime/krpc-data-stream-sender.cc:955:50
        #6 KrpcDataStreamSender::Send(RuntimeState*, RowBatch*)
           runtime/krpc-data-stream-sender.cc:870:45
    
        runtime/tuple.h:106:12: runtime error: null pointer passed as
           argument 1, which is declared to never be null
        /usr/include/string.h:62:79: note: nonnull attribute specified
           here
        #0 Tuple::ClearNullBits(int, int) runtime/tuple.h:106:5
        #1 HdfsScanner::InitTuple(TupleDescriptor const*, Tuple*, Tuple*)
           exec/hdfs-scanner.h:512:14
        #2 HdfsOrcScanner::AssembleCollection(OrcComplexColumnReader
           const&, int, CollectionValueBuilder*)
           exec/hdfs-orc-scanner.cc:742:7
        #3 OrcCollectionReader::ReadValue(int, Tuple*, MemPool*)
           exec/orc-column-readers.cc:375:20
        #4 OrcStructReader::ReadValue(int, Tuple*, MemPool*)
           exec/orc-column-readers.cc:322:52
        #5 OrcListReader::ReadChildrenValue(int, int, Tuple*, MemPool*)
           const exec/orc-column-readers.cc:473:52
        #6 HdfsOrcScanner::AssembleCollection(OrcComplexColumnReader
           const&, int, CollectionValueBuilder*)
           exec/hdfs-orc-scanner.cc:743:60
        #7 OrcCollectionReader::ReadValue(int, Tuple*, MemPool*)
           exec/orc-column-readers.cc:375:20
        #8 OrcStructReader::TransferTuple(Tuple*, MemPool*)
           exec/orc-column-readers.cc:346:52
        #9 HdfsOrcScanner::TransferTuples(OrcComplexColumnReader*,
           RowBatch*) exec/hdfs-orc-scanner.cc:669:58
        #10 HdfsOrcScanner::AssembleRows(RowBatch*)
            exec/hdfs-orc-scanner.cc:629:45
        #11 HdfsOrcScanner::GetNextInternal(RowBatch*)
            exec/hdfs-orc-scanner.cc:507:19
        #12 HdfsOrcScanner::ProcessSplit() exec/hdfs-orc-scanner.cc:426:21
        #13 HdfsScanNode::ProcessSplit(vector<FilterContext> const&,
            MemPool*, io::ScanRange*, long*) exec/hdfs-scan-node.cc:514:21
        #14 HdfsScanNode::ScannerThread(bool, long)
            exec/hdfs-scan-node.cc:415:7
        #15 HdfsScanNode::ThreadTokenAvailableCb(ThreadResourcePool*)
            ::$_0::operator()() const exec/hdfs-scan-node.cc:337:13
    
        runtime/collection-value-builder.h:75:25: runtime error: null
           pointer passed as argument 2, which is declared to never be null
        /usr/include/string.h:43:28: note: nonnull attribute specified
           here
        #0 CollectionValueBuilder::GetFreeMemory(Tuple**, int*)
           runtime/collection-value-builder.h:75:9
        #1 HdfsScanner::GetCollectionMemory(CollectionValueBuilder*,
           MemPool**, Tuple**, TupleRow**, long*)
           exec/hdfs-scanner.cc:194:3
        #2 HdfsOrcScanner::AssembleCollection(OrcComplexColumnReader
           const&, int, CollectionValueBuilder*)
           exec/hdfs-orc-scanner.cc:733:9
        #3 HdfsOrcScanner::AssembleCollection(OrcComplexColumnReader
           const&, int, CollectionValueBuilder*)
           exec/hdfs-orc-scanner.cc:710:7
        #4 HdfsOrcScanner::AssembleCollection(OrcComplexColumnReader
           const&, int, CollectionValueBuilder*)
           exec/hdfs-orc-scanner.cc:710:7
        #5 OrcCollectionReader::ReadValue(int, Tuple*, MemPool*)
           exec/orc-column-readers.cc:375:20
        #6 OrcStructReader::TransferTuple(Tuple*, MemPool*)
           exec/orc-column-readers.cc:346:5
        #7 HdfsOrcScanner::TransferTuples(OrcComplexColumnReader*,
           RowBatch*) exec/hdfs-orc-scanner.cc:669:5
        #8 HdfsOrcScanner::AssembleRows(RowBatch*)
           exec/hdfs-orc-scanner.cc:629:5
        #9 HdfsOrcScanner::GetNextInternal(RowBatch*)
           exec/hdfs-orc-scanner.cc:507:19
        #10 HdfsScanner::GetNext(RowBatch*) exec/hdfs-scanner.h:133:12
        #11 HdfsScanNodeMt::GetNext(RuntimeState*, RowBatch*, bool*)
            exec/hdfs-scan-node-mt.cc:106:29
        #12 SubplanNode::GetNext(RuntimeState*, RowBatch*, bool*)
            exec/subplan-node.cc:129:7
        #13 AggregationNode::Open(RuntimeState*)
            exec/aggregation-node.cc:67:5
    
    Change-Id: I9362ce6b9ba470ed90e5bd2dc313b66ebd8c6af5
    Reviewed-on: http://gerrit.cloudera.org:8080/13436
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/collection-value-builder.h | 3 ++-
 be/src/runtime/sorter.cc                  | 3 ++-
 be/src/runtime/tuple.cc                   | 5 +++--
 be/src/runtime/tuple.h                    | 4 +++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/be/src/runtime/collection-value-builder.h b/be/src/runtime/collection-value-builder.h
index d75e94b..c98816b 100644
--- a/be/src/runtime/collection-value-builder.h
+++ b/be/src/runtime/collection-value-builder.h
@@ -22,6 +22,7 @@
 #include "runtime/mem-tracker.h"
 #include "runtime/tuple.h"
 #include "util/debug-util.h"
+#include "util/ubsan.h"
 
 namespace impala {
 
@@ -72,7 +73,7 @@ class CollectionValueBuilder {
               ErrorMsg(TErrorCode::COLLECTION_ALLOC_FAILED, new_buffer_size,
               path, buffer_size_, coll_value_->num_tuples).msg(), new_buffer_size);
         }
-        memcpy(new_buf, coll_value_->ptr, bytes_written);
+        Ubsan::MemCpy(new_buf, coll_value_->ptr, bytes_written);
         coll_value_->ptr = new_buf;
         buffer_size_ = new_buffer_size;
       }
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index a7a5a64..e3d828c 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -27,6 +27,7 @@
 #include "runtime/query-state.h"
 #include "runtime/runtime-state.h"
 #include "runtime/sorted-run-merger.h"
+#include "util/ubsan.h"
 
 #include "common/names.h"
 
@@ -572,7 +573,7 @@ Status Sorter::Run::AddPage(vector<Page>* page_sequence) {
 void Sorter::Run::CopyVarLenData(const vector<StringValue*>& string_values,
     uint8_t* dest) {
   for (StringValue* string_val: string_values) {
-    memcpy(dest, string_val->ptr, string_val->len);
+    Ubsan::MemCpy(dest, string_val->ptr, string_val->len);
     string_val->ptr = reinterpret_cast<char*>(dest);
     dest += string_val->len;
   }
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index 916ae6f..e5688d3 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -35,6 +35,7 @@
 #include "runtime/tuple-row.h"
 #include "util/debug-util.h"
 #include "util/runtime-profile-counters.h"
+#include "util/ubsan.h"
 
 #include "common/names.h"
 
@@ -102,7 +103,7 @@ void Tuple::DeepCopyVarlenData(const TupleDescriptor& desc, MemPool* pool) {
     if (IsNull((*slot)->null_indicator_offset())) continue;
     StringValue* string_v = GetStringSlot((*slot)->tuple_offset());
     char* string_copy = reinterpret_cast<char*>(pool->Allocate(string_v->len));
-    memcpy(string_copy, string_v->ptr, string_v->len);
+    Ubsan::MemCpy(string_copy, string_v->ptr, string_v->len);
     string_v->ptr = string_copy;
   }
 
@@ -143,7 +144,7 @@ void Tuple::DeepCopyVarlenData(const TupleDescriptor& desc, char** data, int* of
     if (IsNull((*slot)->null_indicator_offset())) continue;
 
     StringValue* string_v = GetStringSlot((*slot)->tuple_offset());
-    memcpy(*data, string_v->ptr, string_v->len);
+    Ubsan::MemCpy(*data, string_v->ptr, string_v->len);
     string_v->ptr = convert_ptrs ? reinterpret_cast<char*>(*offset) : *data;
     *data += string_v->len;
     *offset += string_v->len;
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index 6a40af8..5f2c4af 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -24,6 +24,7 @@
 #include "gutil/macros.h"
 #include "runtime/descriptors.h"
 #include "runtime/mem-pool.h"
+#include "util/ubsan.h"
 
 namespace llvm {
 class Function;
@@ -103,7 +104,8 @@ class Tuple {
   }
 
   void ClearNullBits(int null_bytes_offset, int num_null_bytes) {
-    memset(reinterpret_cast<uint8_t*>(this) + null_bytes_offset, 0, num_null_bytes);
+    Ubsan::MemSet(
+        reinterpret_cast<uint8_t*>(this) + null_bytes_offset, 0, num_null_bytes);
   }
 
   /// The total size of all data represented in this tuple (tuple data and referenced