You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2019/07/25 17:45:20 UTC
[impala] 02/04: IMPALA-5031: Fix undefined behavior: ptr overflow
This is an automated email from the ASF dual-hosted git repository.
arodoni pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1c18a46c16484c596404f8d2661585ce0e53e187
Author: Jim Apple <jb...@apache.org>
AuthorDate: Sun Jul 21 17:23:42 2019 -0700
IMPALA-5031: Fix undefined behavior: ptr overflow
In expr.add, the standard says:
When an expression that has integral type is added to or
subtracted from a pointer, the result has the type of the pointer
operand. ... If both the pointer operand and the result point to
elements of the same array object, or one past the last element of
the array object, the evaluation shall not produce an overflow;
otherwise, the behavior is undefined.
This is triggered in the end-to-end tests.h The interesting part of
the backtrace is:
exec/parquet/hdfs-parquet-scanner.cc:1405:45: runtime error: pointer
index expression with base 0x00001300e0e9 overflowed to
0xffffffff1300e0ea
#0 HdfsParquetScanner::ProcessFooter()
exec/parquet/hdfs-parquet-scanner.cc:1405:45
#1 HdfsParquetScanner::Open(ScannerContext*)
exec/parquet/hdfs-parquet-scanner.cc:186:26
#2 HdfsScanNodeBase::CreateAndOpenScannerHelper(
HdfsPartitionDescriptor*, ScannerContext*,
scoped_ptr<HdfsScanner>*) exec/hdfs-scan-node-base.cc:721:59
#3 HdfsScanNodeMt::CreateAndOpenScanner(HdfsPartitionDescriptor*,
ScannerContext*, scoped_ptr<HdfsScanner>*)
exec/hdfs-scan-node-mt.cc:127:19
#4 HdfsScanNodeMt::GetNext(RuntimeState*, RowBatch*, bool*)
exec/hdfs-scan-node-mt.cc:97:21
Change-Id: I81c7db75b564045106edf3d46e2c4a62be77359f
Reviewed-on: http://gerrit.cloudera.org:8080/13889
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/parquet/hdfs-parquet-scanner.cc | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/be/src/exec/parquet/hdfs-parquet-scanner.cc b/be/src/exec/parquet/hdfs-parquet-scanner.cc
index fe6e724..35c2e70 100644
--- a/be/src/exec/parquet/hdfs-parquet-scanner.cc
+++ b/be/src/exec/parquet/hdfs-parquet-scanner.cc
@@ -1402,11 +1402,16 @@ Status HdfsParquetScanner::ProcessFooter() {
// the magic number
uint8_t* metadata_size_ptr = magic_number_ptr - sizeof(int32_t);
uint32_t metadata_size = *reinterpret_cast<uint32_t*>(metadata_size_ptr);
- uint8_t* metadata_ptr = metadata_size_ptr - metadata_size;
// The start of the metadata is:
// file_len - 4-byte footer length field - 4-byte version number field - metadata size
int64_t metadata_start = file_len - sizeof(int32_t) - sizeof(PARQUET_VERSION_NUMBER) -
metadata_size;
+ if (UNLIKELY(metadata_start < 0)) {
+ return Status(Substitute("File '$0' is invalid. Invalid metadata size in file "
+ "footer: $1 bytes. File size: $2 bytes.",
+ filename(), metadata_size, file_len));
+ }
+ uint8_t* metadata_ptr = metadata_size_ptr - metadata_size;
// If the metadata was too big, we need to read it into a contiguous buffer before
// deserializing it.
@@ -1420,10 +1425,6 @@ Status HdfsParquetScanner::ProcessFooter() {
int64_t partition_id = context_->partition_descriptor()->id();
const HdfsFileDesc* file_desc = scan_node_->GetFileDesc(partition_id, filename());
DCHECK_EQ(file_desc, stream_->file_desc());
- if (metadata_start < 0) {
- return Status(Substitute("File '$0' is invalid. Invalid metadata size in file "
- "footer: $1 bytes. File size: $2 bytes.", filename(), metadata_size, file_len));
- }
if (!metadata_buffer.TryAllocate(metadata_size)) {
string details = Substitute("Could not allocate buffer of $0 bytes for Parquet "