You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bh...@apache.org on 2019/09/09 19:53:05 UTC

[impala] branch master updated (521b152 -> 4327cc3)

This is an automated email from the ASF dual-hosted git repository.

bharathv pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 521b152  IMPALA-8921: Use short name for Ranger grant/revoke requests
     new fe54ebd  IMPALA-5031: widen Thrift enum to placate UBSAN
     new 4327cc3  IMPALA-8931: Fix fe trigger for lineage events

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 common/thrift/parquet.thrift                       | 22 ++++++++++++++++++++++
 .../org/apache/impala/service/BackendConfig.java   |  6 +++++-
 tests/custom_cluster/test_query_event_hooks.py     | 11 ++++++++---
 3 files changed, 35 insertions(+), 4 deletions(-)


[impala] 01/02: IMPALA-5031: widen Thrift enum to placate UBSAN

Posted by bh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bharathv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fe54ebdc90e2d8d889474fe51154d684a1430203
Author: Jim Apple <jb...@apache.org>
AuthorDate: Sat Jul 27 12:00:35 2019 -0700

    IMPALA-5031: widen Thrift enum to placate UBSAN
    
    This fixes an instance of undefined behavior in the end-to-end tests
    in which an enum value is outside of the allowable values for that
    enum according to the C++14 standard.
    
    Representative backtrace:
    
    exec/parquet/parquet-metadata-utils.cc:293:26: runtime error: load of
      value 49, which is not a valid value for type 'Type::type'
        #0 ParquetMetadataUtils::ValidateRowGroupColumn(
           parquet::FileMetaData const&, char const*, int, int,
           parquet::SchemaElement const&, RuntimeState*)
           exec/parquet/parquet-metadata-utils.cc:293:26
        #1 BaseScalarColumnReader::Reset(HdfsFileDesc const&,
           parquet::ColumnChunk const&, int)
           exec/parquet/parquet-column-readers.cc:1077:43
        #2 HdfsParquetScanner::InitScalarColumns()
           exec/parquet/hdfs-parquet-scanner.cc:1679:60
        #3 HdfsParquetScanner::NextRowGroup()
           exec/parquet/hdfs-parquet-scanner.cc:648:45
        #4 HdfsParquetScanner::GetNextInternal(RowBatch*)
           exec/parquet/hdfs-parquet-scanner.cc:437:45
        #5 HdfsParquetScanner::ProcessSplit()
           exec/parquet/hdfs-parquet-scanner.cc:353:21
        #6 HdfsScanNode::ProcessSplit(vector<FilterContext> const&,
           MemPool*, io::ScanRange*, long*) exec/hdfs-scan-node.cc:514:21
        #7 HdfsScanNode::ScannerThread(bool, long)
           exec/hdfs-scan-node.cc:415:7
        #8 HdfsScanNode::ThreadTokenAvailableCb(ThreadResourcePool*)::
           $_0::operator()() const exec/hdfs-scan-node.cc:337:13
    
    Change-Id: I48090e8e0c6c6f18bb1ad3c32c1f5fbffc908844
    Reviewed-on: http://gerrit.cloudera.org:8080/13940
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 common/thrift/parquet.thrift | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/common/thrift/parquet.thrift b/common/thrift/parquet.thrift
index 1197c2e..723b250 100644
--- a/common/thrift/parquet.thrift
+++ b/common/thrift/parquet.thrift
@@ -38,6 +38,28 @@ enum Type {
   DOUBLE = 5;
   BYTE_ARRAY = 6;
   FIXED_LEN_BYTE_ARRAY = 7;
+
+  /**
+   * UBSAN_FORCE_WIDTH forces the values of the C++ enum Type to include (1u << 31) - 1.
+   * That prevents the undefined behavior in the [expr] and [dcl.enum] sections of the
+   * C++14 standard:
+   *
+   *     If during the evaluation of an expression, the result is not mathematically
+   *     defined or not in the range of representable values for its type, the behavior is
+   *     undefined.
+   *
+   * and
+   *
+   *     [F]or an enumeration where emin is the smallest enumerator and emax is the
+   *     largest, the values of the enumeration are the values in the range bmin to bmax,
+   *     defined as follows: Let K be 1 for a two's complement representation and 0 for a
+   *     one's complement or sign-magnitude representation. bmax is the smallest value
+   *     greater than or equal to max(|emin| - K, |emax|) and equal to 2^M-1, where M is
+   *     a non-negative integer. bmin is zero if emin is non-negative and -(bmax+K)
+   *     otherwise.
+   */
+
+  UBSAN_FORCE_WIDTH = 0x7ffffff;
 }
 
 /**


[impala] 02/02: IMPALA-8931: Fix fe trigger for lineage events

Posted by bh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bharathv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4327cc351317878de51003c2dcf4a71c9183eb44
Author: Bharath Vissapragada <bh...@cloudera.com>
AuthorDate: Sat Sep 7 15:46:15 2019 -0700

    IMPALA-8931: Fix fe trigger for lineage events
    
    Currently, fe generates lineage objects only when --lineage_event_log_dir
    is configured. This is a legacy startup param. Lineages should also be
    generated when event hooks are configured since they can potentially
    consume them.
    
    Added a test that confirms that the hook is invoked when a lineage is
    created.
    
    Change-Id: I2d8deb05883cc3ecab27fe4afd031a1e7ccb0829
    Reviewed-on: http://gerrit.cloudera.org:8080/14194
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/src/main/java/org/apache/impala/service/BackendConfig.java |  6 +++++-
 tests/custom_cluster/test_query_event_hooks.py                | 11 ++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index 534dc70..b528435 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -51,7 +51,11 @@ public class BackendConfig {
   public TBackendGflags getBackendCfg() { return backendCfg_; }
   public long getReadSize() { return backendCfg_.read_size; }
   public boolean getComputeLineage() {
-    return !Strings.isNullOrEmpty(backendCfg_.lineage_event_log_dir);
+    // Lineage is computed in the fe if --lineage_event_log_dir is configured or
+    // a query event hook is configured with --query_event_hook_classes. The hook
+    // may or may not consume the lineage but we still include it.
+    return !Strings.isNullOrEmpty(backendCfg_.lineage_event_log_dir) ||
+        !Strings.isNullOrEmpty(getQueryExecHookClasses());
   }
   public long getIncStatsMaxSize() { return backendCfg_.inc_stats_size_limit_bytes; }
   public boolean isStatsExtrapolationEnabled() {
diff --git a/tests/custom_cluster/test_query_event_hooks.py b/tests/custom_cluster/test_query_event_hooks.py
index d0d22ac..a884a22 100644
--- a/tests/custom_cluster/test_query_event_hooks.py
+++ b/tests/custom_cluster/test_query_event_hooks.py
@@ -35,7 +35,7 @@ class TestHooks(CustomClusterTestSuite):
   @CustomClusterTestSuite.with_args(
       impala_log_dir=tempfile.mkdtemp(prefix="test_hooks_", dir=os.getenv("LOG_DIR")),
       impalad_args="--query_event_hook_classes={0} "
-                   "--minidump_path={1} "
+                   "--minidump_path={1} -logbuflevel=-1"
                    .format(DUMMY_HOOK, MINIDUMP_PATH),
       catalogd_args="--minidump_path={0}".format(MINIDUMP_PATH))
   def test_query_event_hooks_execute(self, unique_database):
@@ -45,8 +45,13 @@ class TestHooks(CustomClusterTestSuite):
     """
     # Dummy hook should log something (See org.apache.impala.testutil.DummyQueryEventHook)
     self.assert_impalad_log_contains("INFO",
-                                     "{0}.onImpalaStartup".format(self.DUMMY_HOOK),
-                                     expected_count=-1)
+        "{0}.onImpalaStartup".format(self.DUMMY_HOOK), expected_count=-1)
+    # Run a test query that triggers a lineage event.
+    self.execute_query_expect_success(
+        self.client, "select count(*) from functional.alltypes")
+    # onQueryComplete() is invoked by the lineage logger.
+    self.assert_impalad_log_contains("INFO",
+        "{0}.onQueryComplete".format(self.DUMMY_HOOK), expected_count=-1)
 
 
 class TestHooksStartupFail(CustomClusterTestSuite):