You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2023/03/01 14:56:40 UTC

[impala] branch master updated (fd3bccf5f -> 23c265d12)

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from fd3bccf5f IMPALA-11803: Fix hitting DCHECK when running union on empty table with MT_DOP>1
     new 630650ba0 IMPALA-11944: Add SLES 15 support
     new 0c72c98f9 IMPALA-9627: Update utility scripts for Python 3
     new 9cc19eb5f IMPALA-11955: Detect system Pythons
     new 23c265d12 IMPALA-11626: Handle COMMIT_COMPACTION_EVENT from HMS

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CMakeLists.txt                                     | 12 +--
 bin/bootstrap_toolchain.py                         |  8 +-
 bin/impala-config.sh                               |  8 +-
 .../org/apache/impala/compat/MetastoreShim.java    |  8 ++
 .../org/apache/impala/compat/MetastoreShim.java    | 15 ++++
 .../impala/catalog/events/MetastoreEvents.java     | 79 +++++++++++++++++++
 .../apache/impala/service/CatalogOpExecutor.java   | 38 ++++++++--
 infra/python/bootstrap_virtualenv.py               |  9 ++-
 infra/python/deps/pip_download.py                  |  5 +-
 shell/make_shell_tarball.sh                        | 32 ++++----
 tests/custom_cluster/test_events_custom_configs.py | 88 ++++++++++++++++++++++
 tests/shell/util.py                                | 19 ++---
 12 files changed, 279 insertions(+), 42 deletions(-)


[impala] 03/04: IMPALA-11955: Detect system Pythons

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9cc19eb5f67248dff8edf8ea0193ec4f029f8b2a
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Tue Feb 28 10:23:53 2023 -0800

    IMPALA-11955: Detect system Pythons
    
    Detect system Pythons (2 and 3) during build configuration. Build the
    impala-shell tarball only using available Python versions, and test
    available versions.
    
    Drops support for DISABLE_PYTHON3_TEST as it's now automatically
    detected. If python3 is present on the system, it's expected to be
    usable.
    
    Testing:
    - built in SLES 15 SP4 container with Python 3
    
    Change-Id: Iba36d0feba163e1251c66a6a49121d4dac625afc
    Reviewed-on: http://gerrit.cloudera.org:8080/19560
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 CMakeLists.txt              | 12 +++++++-----
 bin/impala-config.sh        |  6 ++++++
 shell/make_shell_tarball.sh | 32 +++++++++++++++++++-------------
 tests/shell/util.py         | 19 ++++++++++---------
 4 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 611c5a533..cb35b393a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -514,12 +514,14 @@ add_custom_target(impala_python ALL
   COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh"
 )
 
-if(NOT $ENV{DISABLE_PYTHON3_TEST} EQUAL "")
-  message(STATUS "DISABLE_PYTHON3_TEST is set, disabling Python3 virtualenv and tests")
-  add_custom_target(impala_shell_pypi ALL DEPENDS shell_python2_install)
-else()
-  add_custom_target(impala_shell_pypi ALL DEPENDS shell_python2_install shell_python3_install)
+set(IMPALA_PYTHON_INSTALLS "")
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
+  list(APPEND IMPALA_PYTHON_INSTALLS shell_python2_install)
+endif()
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} EQUAL "")
+  list(APPEND IMPALA_PYTHON_INSTALLS shell_python3_install)
 endif()
+add_custom_target(impala_shell_pypi ALL DEPENDS ${IMPALA_PYTHON_INSTALLS})
 
 add_custom_target(notests_independent_targets DEPENDS
   java cscope tarballs impala_python impala_shell_pypi
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 8e08a2fce..ae9a00a42 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -196,6 +196,10 @@ unset IMPALA_THRIFT_JAVA_URL
 export IMPALA_THRIFT_PY_VERSION=0.16.0-p3
 unset IMPALA_THRIFT_PY_URL
 
+# Find system python versions for testing
+export IMPALA_SYSTEM_PYTHON2=$(command -v python2)
+export IMPALA_SYSTEM_PYTHON3=$(command -v python3)
+
 if [[ $OSTYPE == "darwin"* ]]; then
   IMPALA_CYRUS_SASL_VERSION=2.1.26
   unset IMPALA_CYRUS_SASL_URL
@@ -982,6 +986,8 @@ echo "IMPALA_ICEBERG_VERSION  = $IMPALA_ICEBERG_VERSION"
 echo "IMPALA_COS_VERSION      = $IMPALA_COS_VERSION"
 echo "IMPALA_OSS_VERSION      = $IMPALA_OSS_VERSION"
 echo "IMPALA_OBS_VERSION      = $IMPALA_OBS_VERSION"
+echo "IMPALA_SYSTEM_PYTHON2   = $IMPALA_SYSTEM_PYTHON2"
+echo "IMPALA_SYSTEM_PYTHON3   = $IMPALA_SYSTEM_PYTHON3"
 
 # Kerberos things.  If the cluster exists and is kerberized, source
 # the required environment.  This is required for any hadoop tool to
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index 6a2273b44..775a63c48 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -118,25 +118,31 @@ for MODULE in ${SHELL_HOME}/ext-py/*; do
     continue;
   fi
   pushd ${MODULE} > /dev/null 2>&1
-  echo "Cleaning up old build artifacts."
-  rm -rf dist 2>&1 > /dev/null
-  rm -rf build 2>&1 > /dev/null
-  echo "Creating an egg for ${MODULE}"
-  if [[ "$MODULE" == *"/bitarray"* ]]; then
-    # Need to use setuptools to build egg for bitarray module
-    python -c "import setuptools; exec(open('setup.py').read())" -q bdist_egg
-  else
-    python setup.py -q bdist_egg clean
+  if [ ! -z "${IMPALA_SYSTEM_PYTHON2:-}" ]; then
+    echo "Cleaning up old build artifacts."
+    rm -rf dist 2>&1 > /dev/null
+    rm -rf build 2>&1 > /dev/null
+    echo "Creating a Python 2 egg for ${MODULE}"
+    if [[ "$MODULE" == *"/bitarray"* ]]; then
+      # Need to use setuptools to build egg for bitarray module
+      ${IMPALA_SYSTEM_PYTHON2} -c "import setuptools; exec(open('setup.py').read())" \
+          -q bdist_egg
+    else
+      ${IMPALA_SYSTEM_PYTHON2} setup.py -q bdist_egg clean
+    fi
+    cp dist/*.egg ${TARBALL_ROOT}/ext-py2
   fi
-  cp dist/*.egg ${TARBALL_ROOT}/ext-py2
-  if [ -z "${DISABLE_PYTHON3_TEST:-}" ]; then
+  if [ ! -z "${IMPALA_SYSTEM_PYTHON3:-}" ]; then
+    echo "Cleaning up old build artifacts."
     rm -rf dist 2>&1 > /dev/null
     rm -rf build 2>&1 > /dev/null
+    echo "Creating a Python 3 egg for ${MODULE}"
     if [[ "$MODULE" == *"/bitarray"* ]]; then
       # Need to use setuptools to build egg for bitarray module
-      python3 -c "import setuptools; exec(open('setup.py').read())" -q bdist_egg
+      ${IMPALA_SYSTEM_PYTHON3} -c "import setuptools; exec(open('setup.py').read())" \
+          -q bdist_egg
     else
-      python3 setup.py -q bdist_egg clean
+      ${IMPALA_SYSTEM_PYTHON3} setup.py -q bdist_egg clean
     fi
     cp dist/*.egg ${TARBALL_ROOT}/ext-py3
   fi
diff --git a/tests/shell/util.py b/tests/shell/util.py
index f369c646d..2097eabd3 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -337,16 +337,17 @@ def get_dev_impala_shell_executable():
 
 def create_impala_shell_executable_dimension(dev_only=False):
   _, include_pypi = get_dev_impala_shell_executable()
+  dimensions = []
+  if os.getenv("IMPALA_SYSTEM_PYTHON2"):
+    dimensions.append('dev')
+  if os.getenv("IMPALA_SYSTEM_PYTHON3"):
+    dimensions.append('dev3')
   if include_pypi and not dev_only:
-    if 'DISABLE_PYTHON3_TEST' in os.environ:
-      return ImpalaTestDimension('impala_shell', 'dev', 'python2')
-    else:
-      return ImpalaTestDimension('impala_shell', 'dev', 'dev3', 'python2', 'python3')
-  else:
-    if 'DISABLE_PYTHON3_TEST' in os.environ:
-      return ImpalaTestDimension('impala_shell', 'dev')
-    else:
-      return ImpalaTestDimension('impala_shell', 'dev', 'dev3')
+    if os.getenv("IMPALA_SYSTEM_PYTHON2"):
+      dimensions.append('python2')
+    if os.getenv("IMPALA_SYSTEM_PYTHON3"):
+      dimensions.append('python3')
+  return ImpalaTestDimension('impala_shell', *dimensions)
 
 
 def get_impala_shell_executable(vector):


[impala] 04/04: IMPALA-11626: Handle COMMIT_COMPACTION_EVENT from HMS

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 23c265d12804c91c08a08a0be92c155424ea3d99
Author: Sai Hemanth Gantasala <sa...@cloudera.com>
AuthorDate: Wed Oct 19 09:37:17 2022 -0700

    IMPALA-11626: Handle COMMIT_COMPACTION_EVENT from HMS
    
    Since HIVE-24329 HMS emits an event when a compaction is committed,
    but Impala ignores it. Handling it would allow automatic refreshing
    of file metadata after commit compactions.
    
    Testing: Added an end-to-end test that tests the processing of
    commit compaction event that was triggered in HMS. Also added an
    edge case where event processor would handle the condition of the
    partition being missed/deleted.
    
    Change-Id: I464faedb4e3bbcd417bab2e3cb0d57e339d42605
    Reviewed-on: http://gerrit.cloudera.org:8080/19155
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/compat/MetastoreShim.java    |  8 ++
 .../org/apache/impala/compat/MetastoreShim.java    | 15 ++++
 .../impala/catalog/events/MetastoreEvents.java     | 79 +++++++++++++++++++
 .../apache/impala/service/CatalogOpExecutor.java   | 38 ++++++++--
 tests/custom_cluster/test_events_custom_configs.py | 88 ++++++++++++++++++++++
 5 files changed, 221 insertions(+), 7 deletions(-)

diff --git a/fe/src/compat-apache-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-apache-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index e5235d54f..9fab5e384 100644
--- a/fe/src/compat-apache-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-apache-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -443,6 +443,14 @@ public class MetastoreShim extends Hive3MetastoreShimBase {
     throw new UnsupportedOperationException("Reload event is not supported.");
   }
 
+  /**
+   *   CDP Hive-3 only function.
+   */
+  public static String getPartitionNameFromCommitCompactionEvent(
+      NotificationEvent event) {
+    throw new UnsupportedOperationException("CommitCompaction event is not supported.");
+  }
+
   /**
    * Use thrift API directly instead of HiveMetastoreClient#getNextNotification because
    * the HMS client can throw an IllegalStateException when there is a gap between the
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index bbda843d0..b31897287 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.metastore.api.WriteNotificationLogRequest;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.messaging.AlterTableMessage;
 import org.apache.hadoop.hive.metastore.messaging.CommitTxnMessage;
+import org.apache.hadoop.hive.metastore.messaging.CommitCompactionMessage;
 import org.apache.hadoop.hive.metastore.messaging.EventMessage;
 import org.apache.hadoop.hive.metastore.messaging.MessageBuilder;
 import org.apache.hadoop.hive.metastore.messaging.MessageDeserializer;
@@ -565,6 +566,20 @@ public class MetastoreShim extends Hive3MetastoreShimBase {
     return updatedFields;
   }
 
+  /**
+   *  This method extracts the partition name field from the
+   *  notification event and returns it in the form of string.
+   *
+   * @param event Metastore notification event,
+   * @return the partition name, required for the commit compaction event.
+   */
+  public static String getPartitionNameFromCommitCompactionEvent(
+      NotificationEvent event) {
+    CommitCompactionMessage commitCompactionMessage = MetastoreEventsProcessor.
+        getMessageDeserializer().getCommitCompactionMessage(event.getMessage());
+    return commitCompactionMessage.getPartName();
+  }
+
   /**
    * Wrapper around IMetaStoreClient.getThriftClient().get_next_notification() to deal
    * with added arguments.
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index a8eafd569..78895d2b4 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -125,6 +125,7 @@ public class MetastoreEvents {
     ALLOC_WRITE_ID_EVENT("ALLOC_WRITE_ID_EVENT"),
     COMMIT_TXN("COMMIT_TXN"),
     ABORT_TXN("ABORT_TXN"),
+    COMMIT_COMPACTION("COMMIT_COMPACTION_EVENT"),
     OTHER("OTHER");
 
     private final String eventType_;
@@ -218,6 +219,8 @@ public class MetastoreEvents {
           return new ReloadEvent(catalogOpExecutor_, metrics, event);
         case INSERT:
           return new InsertEvent(catalogOpExecutor_, metrics, event);
+        case COMMIT_COMPACTION:
+          return new CommitCompactionEvent(catalogOpExecutor_, metrics, event);
         default:
           // ignore all the unknown events by creating a IgnoredEvent
           return new IgnoredEvent(catalogOpExecutor_, metrics, event);
@@ -920,6 +923,24 @@ public class MetastoreEvents {
       }
     }
 
+    protected void reloadPartitionsFromNames(List<String> partitionNames, String reason,
+        FileMetadataLoadOpts fileMetadataLoadOpts) throws CatalogException {
+      try {
+        int numPartsRefreshed = catalogOpExecutor_.reloadPartitionsFromNamesIfExists(
+            getEventId(), dbName_, tblName_, partitionNames, reason,
+            fileMetadataLoadOpts);
+        if (numPartsRefreshed > 0) {
+          metrics_.getCounter(MetastoreEventsProcessor.NUMBER_OF_PARTITION_REFRESHES)
+                  .inc(numPartsRefreshed);
+        }
+      } catch (TableNotLoadedException e) {
+        debugLog("Ignoring the event since table {} is not loaded",
+            getFullyQualifiedTblName());
+      } catch (DatabaseNotFoundException | TableNotFoundException e) {
+        debugLog("Ignoring the event since table {} is not found",
+            getFullyQualifiedTblName());
+      }
+    }
 
     /**
      * To decide whether to skip processing this event, fetch table from cache
@@ -2622,6 +2643,64 @@ public class MetastoreEvents {
     }
   }
 
+  /**
+   * Metastore event handler for COMMIT_COMPACTION events. Handles
+   * COMMIT_COMPACTION event for transactional tables.
+   */
+  public static class CommitCompactionEvent extends MetastoreTableEvent {
+    private String partitionName_;
+
+    CommitCompactionEvent(CatalogOpExecutor catalogOpExecutor, Metrics metrics,
+        NotificationEvent event) throws MetastoreNotificationException {
+      super(catalogOpExecutor, metrics, event);
+      Preconditions.checkState(
+          getEventType().equals(MetastoreEventType.COMMIT_COMPACTION));
+      Preconditions.checkNotNull(event.getMessage());
+      try {
+        partitionName_ =
+            MetastoreShim.getPartitionNameFromCommitCompactionEvent(event);
+        org.apache.impala.catalog.Table tbl = catalog_.getTable(dbName_, tblName_);
+        if (tbl != null && tbl.getCreateEventId() < getEventId()) {
+          msTbl_ = tbl.getMetaStoreTable();
+        }
+      } catch (Exception ex) {
+        throw new MetastoreNotificationException(debugString("Unable to "
+            + "parse commit compaction message"), ex);
+      }
+    }
+
+    @Override
+    protected void process() throws MetastoreNotificationException {
+      try {
+        if (partitionName_ == null) {
+          reloadTableFromCatalog("Commit Compaction event", true);
+        } else {
+          reloadPartitionsFromNames(Arrays.asList(partitionName_),
+                  "Commit compaction event", FileMetadataLoadOpts.FORCE_LOAD);
+        }
+      } catch (CatalogException e) {
+        throw new MetastoreNotificationNeedsInvalidateException(debugString("Failed to "
+            + "commit compaction for the table {}. Event processing cannot "
+            + "continue. Issue an invalidate metadata command to reset " +
+            "event processor.", tblName_), e);
+      }
+    }
+
+    @Override
+    protected SelfEventContext getSelfEventContext() {
+      throw new UnsupportedOperationException("Self-event evaluation is not needed for "
+          + "this event type");
+    }
+
+    @Override
+    protected boolean isEventProcessingDisabled() {
+      if (msTbl_ == null) {
+        return false;
+      }
+      return super.isEventProcessingDisabled();
+    }
+  }
+
   /**
    * An event type which is ignored. Useful for unsupported metastore event types
    */
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index a99f8a00e..dcf11da26 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -4497,6 +4497,35 @@ public class CatalogOpExecutor {
   public int reloadPartitionsIfExist(long eventId, String dbName, String tblName,
       List<Partition> partsFromEvent, String reason,
       FileMetadataLoadOpts fileMetadataLoadOpts) throws CatalogException {
+    List<String> partNames = new ArrayList<>();
+    Table table = catalog_.getTable(dbName, tblName);
+    if (table instanceof HdfsTable) {
+      HdfsTable hdfsTable = (HdfsTable) table;
+      for (Partition part : partsFromEvent) {
+        partNames.add(FileUtils.makePartName(hdfsTable.getClusteringColNames(),
+            part.getValues(), null));
+      }
+    }
+    return reloadPartitionsFromNamesIfExists(eventId, dbName, tblName, partNames,
+        reason, fileMetadataLoadOpts);
+  }
+
+  /**
+   * Reloads the given partitions from partiton names if they exist and have not been
+   * removed since the event was generated.
+   *
+   * @param eventId EventId being processed.
+   * @param dbName Database name for the partition
+   * @param tblName Table name for the partition
+   * @param partNames List of partition names from the events to be reloaded.
+   * @param reason Reason for reloading the partitions for logging purposes.
+   * @param fileMetadataLoadOpts describes how to reload file metadata for partsFromEvent
+   * @return the number of partitions which were reloaded. If the table does not exist,
+   * returns 0. Some partitions could be skipped if they don't exist anymore.
+   */
+  public int reloadPartitionsFromNamesIfExists (long eventId, String dbName,
+      String tblName, List<String> partNames, String reason,
+      FileMetadataLoadOpts fileMetadataLoadOpts) throws CatalogException {
     Table table = catalog_.getTable(dbName, tblName);
     if (table == null) {
       DeleteEventLog deleteEventLog = catalog_.getMetastoreEventProcessor()
@@ -4536,13 +4565,8 @@ public class CatalogOpExecutor {
       }
       HdfsTable hdfsTable = (HdfsTable) table;
       // some partitions from the event or the table itself
-      // may not exist in HMS anymore. Hence, we collect the names here and re-fetch
+      // may not exist in HMS anymore. Hence, we re-fetch
       // the partitions from HMS.
-      List<String> partNames = new ArrayList<>();
-      for (Partition part : partsFromEvent) {
-        partNames.add(FileUtils.makePartName(hdfsTable.getClusteringColNames(),
-            part.getValues(), null));
-      }
       int numOfPartsReloaded;
       try (MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient()) {
         numOfPartsReloaded = hdfsTable.reloadPartitionsFromNames(
@@ -4551,7 +4575,7 @@ public class CatalogOpExecutor {
       hdfsTable.setCatalogVersion(newCatalogVersion);
       return numOfPartsReloaded;
     } catch (TableLoadingException e) {
-      LOG.info("Could not reload {} partitions of table {}", partsFromEvent.size(),
+      LOG.info("Could not reload {} partitions of table {}", partNames.size(),
           table.getFullName(), e);
     } catch (InternalException e) {
       errorOccured = true;
diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py
index 36bc7ebdf..63385a325 100644
--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -266,6 +266,94 @@ class TestEventProcessingCustomConfigs(CustomClusterTestSuite):
         .format(unique_database, test_reload_table))
     check_self_events("refresh {}.{}".format(unique_database, test_reload_table))
 
+  @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=1")
+  def test_commit_compaction_events(self, unique_database):
+    """Test is to verify Impala-11626, commit compaction events triggered in HMS would
+    be consumed by CatalogD's event processor.
+    """
+
+    # Test scenario 1: partitioned table
+    test_cc_part_table = "test_cc_partitioned_table"
+    self.run_stmt_in_hive(
+      "create transactional table {}.{} (i int) partitioned by (year int)"
+      .format(unique_database, test_cc_part_table))
+    for i in range(2):
+        self.run_stmt_in_hive(
+          "insert into {}.{} partition (year=2022) values (1),(2),(3)"
+          .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    parts_refreshed_before_compaction = EventProcessorUtils.get_int_metric(
+      "partitions-refreshed")
+    self.client.execute(
+      "select * from {}.{} limit 2"
+      .format(unique_database, test_cc_part_table))
+    self.run_stmt_in_hive(
+      "alter table {}.{} partition(year=2022) compact 'minor' and wait"
+      .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    parts_refreshed_after_compaction = EventProcessorUtils.get_int_metric(
+      "partitions-refreshed")
+    assert parts_refreshed_after_compaction > parts_refreshed_before_compaction
+
+    # Test scenario 2:
+    test_cc_unpart_tab = "test_cc_unpart_table"
+    self.run_stmt_in_hive(
+      "create transactional table {}.{} (i int)"
+      .format(unique_database, test_cc_unpart_tab))
+    for i in range(2):
+        self.run_stmt_in_hive(
+          "insert into {}.{} values (1),(2),(3)"
+          .format(unique_database, test_cc_unpart_tab))
+    EventProcessorUtils.wait_for_event_processing(self)
+    tables_refreshed_before_compaction = EventProcessorUtils.get_int_metric(
+      "tables-refreshed")
+    self.client.execute(
+      "select * from {}.{} limit 2"
+      .format(unique_database, test_cc_unpart_tab))
+    self.run_stmt_in_hive("alter table {}.{} compact 'minor' and wait"
+      .format(unique_database, test_cc_unpart_tab))
+    EventProcessorUtils.wait_for_event_processing(self)
+    tables_refreshed_after_compaction = EventProcessorUtils.get_int_metric(
+      "tables-refreshed")
+    assert tables_refreshed_after_compaction > tables_refreshed_before_compaction
+
+    # Test scenario 3: partitioned table has partition deleted
+    test_cc_part_table = "test_cc_partitioned_table_error"
+    self.run_stmt_in_hive(
+      "create transactional table {}.{} (i int) partitioned by (year int)"
+      .format(unique_database, test_cc_part_table))
+    for i in range(2):
+        self.run_stmt_in_hive(
+          "insert into {}.{} partition (year=2022) values (1),(2),(3)"
+          .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    self.client.execute(
+      "select * from {}.{} limit 2"
+      .format(unique_database, test_cc_part_table))
+    self.run_stmt_in_hive(
+      "alter table {}.{} partition(year=2022) compact 'minor' and wait"
+      .format(unique_database, test_cc_part_table))
+    self.run_stmt_in_hive("alter table {}.{} Drop if exists partition(year=2022)"
+      .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+    # Test scenario 4: process commit compaction for an unloaded table
+    test_cc_part_table = "test_cc_table_unloaded"
+    self.run_stmt_in_hive(
+      "create transactional table {}.{} (i int) partitioned by (year int)"
+      .format(unique_database, test_cc_part_table))
+    for i in range(2):
+        self.run_stmt_in_hive(
+          "insert into {}.{} partition (year=2022) values (1),(2),(3)"
+          .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    self.run_stmt_in_hive(
+      "alter table {}.{} partition(year=2022) compact 'minor' and wait"
+      .format(unique_database, test_cc_part_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
   @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=1")
   def test_event_batching(self, unique_database):
     """Runs queries which generate multiple ALTER_PARTITION events which must be


[impala] 01/04: IMPALA-11944: Add SLES 15 support

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 630650ba080fe377b2c9e03645341ae86d175114
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Fri Feb 24 20:13:50 2023 -0800

    IMPALA-11944: Add SLES 15 support
    
    Updates IMPALA_TOOLCHAIN_BUILD_ID to a native-toolchain build that
    includes SLES 15 support and adds OsMapping for SLES 15.
    
    Testing:
    - built with impala-toolchain-sles15 container image from
      native-toolchain, which includes Python 2 and Java 8 SDK from OpenSUSE
      Leap.
    
    Change-Id: I4015b695862abc6eb901a857cc1c444aff1bbe24
    Reviewed-on: http://gerrit.cloudera.org:8080/19556
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 bin/bootstrap_toolchain.py | 2 ++
 bin/impala-config.sh       | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 457625d19..e74e0b5f3 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -90,6 +90,8 @@ OS_MAPPING = [
   OsMapping("suselinux11", "ec2-package-sles-11", None),
   OsMapping("suselinux12", "ec2-package-sles-12", "sles12"),
   OsMapping("suse12", "ec2-package-sles-12", "sles12"),
+  OsMapping("suselinux15", "ec2-package-sles-15", "sles15"),
+  OsMapping("suse15", "ec2-package-sles-15", "sles15"),
   OsMapping("ubuntu12.04", "ec2-package-ubuntu-12-04", None),
   OsMapping("ubuntu14.04", "ec2-package-ubuntu-14-04", None),
   OsMapping("ubuntu15.04", "ec2-package-ubuntu-14-04", None),
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 5cc4234b1..8e08a2fce 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=237-c284a9372e
+export IMPALA_TOOLCHAIN_BUILD_ID=252-b144ba77b5
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p5


[impala] 02/04: IMPALA-9627: Update utility scripts for Python 3

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0c72c98f91d291c02e281ebbd04ba94401e93c07
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Tue Feb 28 09:43:22 2023 -0800

    IMPALA-9627: Update utility scripts for Python 3
    
    Updates utility scripts that don't use impala-python to work with Python
    3 so we can build on systems that don't include Python 2 (such as SLES
    15 SP4).
    
    Primarily adds 'universal_newlines=True' to subprocess calls so they
    return text rather than binary data in Python 3 with a change that's
    compatible with Python 2.
    
    Testing:
    - built in SLES 15 SP4 container with Python 3
    
    Change-Id: I7f4ce71fa1183aaeeca55d0666aeb113640c5cf2
    Reviewed-on: http://gerrit.cloudera.org:8080/19559
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 bin/bootstrap_toolchain.py           | 6 ++++--
 infra/python/bootstrap_virtualenv.py | 9 ++++++---
 infra/python/deps/pip_download.py    | 5 +++--
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index e74e0b5f3..98ec9d7d6 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -107,7 +107,8 @@ def check_output(cmd_args):
      a non-zero return code. Similar to subprocess.check_output() which is only provided
      in python 2.7.
   """
-  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+      universal_newlines=True)
   stdout, _ = process.communicate()
   if process.wait() != 0:
     raise Exception("Command with args '%s' failed with exit code %s:\n%s"
@@ -422,7 +423,8 @@ def check_output(cmd_args):
      a non-zero return code. Similar to subprocess.check_output() which is only provided
      in python 2.7.
   """
-  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+      universal_newlines=True)
   stdout, _ = process.communicate()
   if process.wait() != 0:
     raise Exception("Command with args '%s' failed with exit code %s:\n%s"
diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py
index d292e17cd..c3bc59932 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -42,7 +42,10 @@ import subprocess
 import sys
 import tarfile
 import tempfile
-import urllib
+try:
+  from urllib.request import pathname2url
+except ImportError:
+  from urllib import pathname2url
 from bootstrap_toolchain import ToolchainPackage
 
 LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
@@ -124,7 +127,7 @@ def exec_cmd(args, **kwargs):
      'args' and 'kwargs' use the same format as subprocess.Popen().
   '''
   process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-      **kwargs)
+      universal_newlines=True, **kwargs)
   output = process.communicate()[0]
   if process.returncode != 0:
     raise Exception("Command returned non-zero status\nCommand: %s\nOutput: %s"
@@ -189,7 +192,7 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
     third_party_pkg_install_cmd.append("--no-index")
 
   third_party_pkg_install_cmd.extend(["--find-links",
-      "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))])
+      "file://%s" % pathname2url(os.path.abspath(DEPS_DIR))])
   third_party_pkg_install_cmd.extend(args)
   exec_cmd(third_party_pkg_install_cmd, env=env)
 
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index d56e0280e..9c41135d8 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -51,7 +51,7 @@ def check_digest(filename, algorithm, expected_digest):
     print('Hash algorithm {0} is not supported by hashlib'.format(algorithm))
     return False
   h = hashlib.new(algorithm)
-  h.update(open(filename).read())
+  h.update(open(filename, mode='rb').read())
   actual_digest = h.hexdigest()
   return actual_digest == expected_digest
 
@@ -89,7 +89,8 @@ def get_package_info(pkg_name, pkg_version):
   # We parse the page with regex instead of an html parser because that requires
   # downloading an extra package before running this script. Since the HTML is guaranteed
   # to be formatted according to PEP 503, this is acceptable.
-  pkg_info = subprocess.check_output(["wget", "-q", "-O", "-", url])
+  pkg_info = subprocess.check_output(
+      ["wget", "-q", "-O", "-", url], universal_newlines=True)
   regex = r'<a .*?href=\".*?packages/(.*?)#(.*?)=(.*?)\".*?>(.*?)<\/a>'
   for match in re.finditer(regex, pkg_info):
     path = match.group(1)