You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bh...@apache.org on 2019/09/09 19:53:06 UTC

[impala] 01/02: IMPALA-5031: widen Thrift enum to placate UBSAN

This is an automated email from the ASF dual-hosted git repository.

bharathv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fe54ebdc90e2d8d889474fe51154d684a1430203
Author: Jim Apple <jb...@apache.org>
AuthorDate: Sat Jul 27 12:00:35 2019 -0700

    IMPALA-5031: widen Thrift enum to placate UBSAN
    
    This fixes an instance of undefined behavior in the end-to-end tests
    in which an enum value is outside of the allowable values for that
    enum according to the C++14 standard.
    
    Representative backtrace:
    
    exec/parquet/parquet-metadata-utils.cc:293:26: runtime error: load of
      value 49, which is not a valid value for type 'Type::type'
        #0 ParquetMetadataUtils::ValidateRowGroupColumn(
           parquet::FileMetaData const&, char const*, int, int,
           parquet::SchemaElement const&, RuntimeState*)
           exec/parquet/parquet-metadata-utils.cc:293:26
        #1 BaseScalarColumnReader::Reset(HdfsFileDesc const&,
           parquet::ColumnChunk const&, int)
           exec/parquet/parquet-column-readers.cc:1077:43
        #2 HdfsParquetScanner::InitScalarColumns()
           exec/parquet/hdfs-parquet-scanner.cc:1679:60
        #3 HdfsParquetScanner::NextRowGroup()
           exec/parquet/hdfs-parquet-scanner.cc:648:45
        #4 HdfsParquetScanner::GetNextInternal(RowBatch*)
           exec/parquet/hdfs-parquet-scanner.cc:437:45
        #5 HdfsParquetScanner::ProcessSplit()
           exec/parquet/hdfs-parquet-scanner.cc:353:21
        #6 HdfsScanNode::ProcessSplit(vector<FilterContext> const&,
           MemPool*, io::ScanRange*, long*) exec/hdfs-scan-node.cc:514:21
        #7 HdfsScanNode::ScannerThread(bool, long)
           exec/hdfs-scan-node.cc:415:7
        #8 HdfsScanNode::ThreadTokenAvailableCb(ThreadResourcePool*)::
           $_0::operator()() const exec/hdfs-scan-node.cc:337:13
    
    Change-Id: I48090e8e0c6c6f18bb1ad3c32c1f5fbffc908844
    Reviewed-on: http://gerrit.cloudera.org:8080/13940
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 common/thrift/parquet.thrift | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/common/thrift/parquet.thrift b/common/thrift/parquet.thrift
index 1197c2e..723b250 100644
--- a/common/thrift/parquet.thrift
+++ b/common/thrift/parquet.thrift
@@ -38,6 +38,28 @@ enum Type {
   DOUBLE = 5;
   BYTE_ARRAY = 6;
   FIXED_LEN_BYTE_ARRAY = 7;
+
+  /**
+   * UBSAN_FORCE_WIDTH forces the values of the C++ enum Type to include (1u << 31) - 1.
+   * That prevents the undefined behavior in the [expr] and [dcl.enum] sections of the
+   * C++14 standard:
+   *
+   *     If during the evaluation of an expression, the result is not mathematically
+   *     defined or not in the range of representable values for its type, the behavior is
+   *     undefined.
+   *
+   * and
+   *
+   *     [F]or an enumeration where emin is the smallest enumerator and emax is the
+   *     largest, the values of the enumeration are the values in the range bmin to bmax,
+   *     defined as follows: Let K be 1 for a two's complement representation and 0 for a
+   *     one's complement or sign-magnitude representation. bmax is the smallest value
+   *     greater than or equal to max(|emin| - K, |emax|) and equal to 2^M-1, where M is
+   *     a non-negative integer. bmin is zero if emin is non-negative and -(bmax+K)
+   *     otherwise.
+   */
+
+  UBSAN_FORCE_WIDTH = 0x7ffffff;
 }
 
 /**