You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/05 23:42:30 UTC

[impala] branch master updated (ae82951 -> 45c5652)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from ae82951  IMPALA-7957: Fix slot equivalences may be enforced multiple times
     new 0e1d2e1  IMPALA-8507: Support DROP TABLE statement with Kudu/HMS integration
     new 45c5652  IMPALA-8425: part 1: reduce size of binaries in container

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/daemon_entrypoint.sh                        |   4 +-
 docker/impala_base/Dockerfile                      |  26 ++---
 docker/setup_build_context.py                      |  25 +++--
 .../apache/impala/service/CatalogOpExecutor.java   |  73 +++++++------
 tests/custom_cluster/test_kudu.py                  | 117 +++++++++++++++++++--
 5 files changed, 182 insertions(+), 63 deletions(-)


[impala] 01/02: IMPALA-8507: Support DROP TABLE statement with Kudu/HMS integration

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0e1d2e148416458b50fba2f790833122beb88aca
Author: Hao Hao <ha...@cloudera.com>
AuthorDate: Tue May 21 23:26:57 2019 -0700

    IMPALA-8507: Support DROP TABLE statement with Kudu/HMS integration
    
    This commit supports the actual handling of DROP TABLE DDL for managed
    Kudu tables with Kudu's integration with the Hive Metastore. When
    Kudu/HMS integration is enabled, after the table is dropped in the
    Kudu, relies on Kudu to drop the table in the HMS.
    
    Change-Id: I6d3b93957cc66009ad7a67fc513be2068f156abc
    Reviewed-on: http://gerrit.cloudera.org:8080/13400
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Grant Henke <gr...@apache.org>
    Reviewed-by: Thomas Marshall <tm...@cloudera.com>
---
 .../apache/impala/service/CatalogOpExecutor.java   |  73 +++++++------
 tests/custom_cluster/test_kudu.py                  | 117 +++++++++++++++++++--
 2 files changed, 149 insertions(+), 41 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 1022e27..3671f0d 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -1435,6 +1435,8 @@ public class CatalogOpExecutor {
       // Remove all the Kudu tables of 'db' from the Kudu storage engine.
       if (db != null && params.cascade) dropTablesFromKudu(db);
 
+      // The Kudu tables in the HMS should have been dropped at this point
+      // with the Hive Metastore integration enabled.
       try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
         msClient.getHiveClient().dropDatabase(
             params.getDb(), true, params.if_exists, params.cascade);
@@ -1506,6 +1508,23 @@ public class CatalogOpExecutor {
     }
   }
 
+  private boolean isKuduHmsIntegrationEnabled(
+      org.apache.hadoop.hive.metastore.api.Table msTbl)
+          throws ImpalaRuntimeException {
+    // Check if Kudu's integration with the Hive Metastore is enabled, and validate
+    // the configuration.
+    String masterHosts = msTbl.getParameters().get(KuduTable.KEY_MASTER_HOSTS);
+    String hmsUris;
+    try {
+      hmsUris = MetaStoreUtil.getHiveMetastoreUrisKeyValue(
+          catalog_.getMetaStoreClient().getHiveClient());
+    } catch (Exception e) {
+      throw new RuntimeException(String.format("Failed to get the Hive Metastore " +
+          "configuration for table '%s' ", msTbl.getTableName()), e);
+    }
+    return KuduTable.isHMSIntegrationEnabledAndValidate(masterHosts, hmsUris);
+  }
+
   /**
    * Drops a table or view from the metastore and removes it from the catalog.
    * Also drops all associated caching requests on the table and/or table's partitions,
@@ -1566,8 +1585,9 @@ public class CatalogOpExecutor {
           LOG.error(String.format(HMS_RPC_ERROR_FORMAT_STR, "getTable") + e.getMessage());
         }
       }
-      if (msTbl != null && KuduTable.isKuduTable(msTbl)
-          && !Table.isExternalTable(msTbl)) {
+      boolean isManagedKuduTable = msTbl != null &&
+              KuduTable.isKuduTable(msTbl) && !Table.isExternalTable(msTbl);
+      if (isManagedKuduTable) {
         KuduCatalogOpExecutor.dropTable(msTbl, /* if exists */ true);
       }
 
@@ -1587,16 +1607,23 @@ public class CatalogOpExecutor {
         }
         throw new CatalogException(errorMsg);
       }
-      try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
-        msClient.getHiveClient().dropTable(
-            tableName.getDb(), tableName.getTbl(), true, params.if_exists, params.purge);
-      } catch (NoSuchObjectException e) {
-        throw new ImpalaRuntimeException(String.format("Table %s no longer exists in " +
-            "the Hive MetaStore. Run 'invalidate metadata %s' to update the Impala " +
-            "catalog.", tableName, tableName));
-      } catch (TException e) {
-        throw new ImpalaRuntimeException(
-            String.format(HMS_RPC_ERROR_FORMAT_STR, "dropTable"), e);
+
+      // When Kudu's HMS integration is enabled, Kudu will drop the managed table
+      // entries automatically. In all other cases, we need to drop the HMS table
+      // entry ourselves.
+      if (!isManagedKuduTable || !isKuduHmsIntegrationEnabled(msTbl)) {
+        try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+          msClient.getHiveClient().dropTable(
+              tableName.getDb(), tableName.getTbl(), true,
+              params.if_exists, params.purge);
+        } catch (NoSuchObjectException e) {
+          throw new ImpalaRuntimeException(String.format("Table %s no longer exists " +
+              "in the Hive MetaStore. Run 'invalidate metadata %s' to update the " +
+              "Impala catalog.", tableName, tableName));
+        } catch (TException e) {
+          throw new ImpalaRuntimeException(
+              String.format(HMS_RPC_ERROR_FORMAT_STR, "dropTable"), e);
+        }
       }
       addSummary(resp, (params.is_table ? "Table " : "View ") + "has been dropped.");
 
@@ -1911,24 +1938,10 @@ public class CatalogOpExecutor {
     } else {
       KuduCatalogOpExecutor.createManagedTable(newTable, params);
     }
-    boolean createsHMSTable;
-    if (Table.isExternalTable(newTable)) {
-      createsHMSTable = true;
-    } else {
-      String masterHosts = newTable.getParameters().get(KuduTable.KEY_MASTER_HOSTS);
-      String hmsUris;
-      // Check if Kudu's integration with the Hive Metastore is enabled for
-      // managed tables, and validate the configuration.
-      try {
-        hmsUris = MetaStoreUtil.getHiveMetastoreUrisKeyValue(
-            catalog_.getMetaStoreClient().getHiveClient());
-      } catch (Exception e) {
-        throw new RuntimeException(String.format("Failed to get the Hive Metastore " +
-            "configuration for table '%s' ", newTable.getTableName()), e);
-      }
-      createsHMSTable =
-         !KuduTable.isHMSIntegrationEnabledAndValidate(masterHosts, hmsUris);
-    }
+    // When Kudu's integration with the Hive Metastore is enabled, Kudu will create
+    // the HMS table for managed tables.
+    boolean createsHMSTable = Table.isExternalTable(newTable) ?
+        true : !isKuduHmsIntegrationEnabled(newTable);
     try {
       // Add the table to the HMS and the catalog cache. Acquire metastoreDdlLock_ to
       // ensure the atomicity of these operations.
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 012fce3..9bf6d3f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -115,6 +115,8 @@ class TestKuduClientTimeout(CustomClusterTestSuite, KuduTestSuite):
 
 
 class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
+  # TODO(IMPALA-8614): parameterize the common tests in query_test/test_kudu.py
+  # to run with HMS integration enabled.
   """Tests the different DDL operations when using a kudu table with Kudu's integration
      with the Hive Metastore."""
 
@@ -122,6 +124,19 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
   def get_workload(cls):
     return 'functional-query'
 
+  @classmethod
+  def setup_class(cls):
+    # Restart Kudu cluster with HMS integration enabled
+    KUDU_ARGS = "-hive_metastore_uris=thrift://%s" % os.environ['INTERNAL_LISTEN_HOST']
+    cls._restart_kudu_service(KUDU_ARGS)
+    super(TestKuduHMSIntegration, cls).setup_class()
+
+  @classmethod
+  def teardown_class(cls):
+    # Restart Kudu cluster with HMS integration disabled
+    cls._restart_kudu_service("-hive_metastore_uris=")
+    super(TestKuduHMSIntegration, cls).teardown_class()
+
   @pytest.mark.execute_serially
   @SkipIfKudu.no_hybrid_clock
   def test_create_managed_kudu_tables(self, vector, unique_database):
@@ -177,15 +192,95 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
     assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
         in table_desc
 
-  @classmethod
-  def setup_class(cls):
-    # Restart Kudu cluster with HMS integration enabled
-    KUDU_ARGS = "-hive_metastore_uris=thrift://%s" % os.environ['INTERNAL_LISTEN_HOST']
-    cls._restart_kudu_service(KUDU_ARGS)
-    super(TestKuduHMSIntegration, cls).setup_class()
+  @pytest.mark.execute_serially
+  def test_drop_non_empty_db(self, unique_cursor, kudu_client):
+    """Check that an attempt to drop a database will fail if Kudu tables are present
+       and that the tables remain.
+    """
+    db_name = unique_cursor.conn.db_name
+    with self.temp_kudu_table(kudu_client, [INT32], db_name=db_name) as kudu_table:
+      assert kudu_client.table_exists(kudu_table.name)
+      unique_cursor.execute("INVALIDATE METADATA")
+      unique_cursor.execute("USE DEFAULT")
+      try:
+        unique_cursor.execute("DROP DATABASE %s" % db_name)
+        assert False
+      except Exception as e:
+        assert "One or more tables exist" in str(e)
 
-  @classmethod
-  def teardown_class(cls):
-    # Restart Kudu cluster with HMS integration disabled
-    cls._restart_kudu_service("-hive_metastore_uris=")
-    super(TestKuduHMSIntegration, cls).teardown_class()
+      # Dropping an empty database should succeed, once all tables
+      # from the database have been dropped.
+      assert kudu_client.table_exists(kudu_table.name)
+      unique_cursor.execute("DROP Table %s" % kudu_table.name)
+      unique_cursor.execute("DROP DATABASE %s" % db_name)
+      assert not kudu_client.table_exists(kudu_table.name)
+
+  @pytest.mark.execute_serially
+  def test_drop_db_cascade(self, unique_cursor, kudu_client):
+    """Check that an attempt to drop a database cascade will succeed even if Kudu
+       tables are present. Make sure the corresponding managed tables are removed
+       from Kudu.
+    """
+    db_name = unique_cursor.conn.db_name
+    with self.temp_kudu_table(kudu_client, [INT32], db_name=db_name) as kudu_table:
+      assert kudu_client.table_exists(kudu_table.name)
+      unique_cursor.execute("INVALIDATE METADATA")
+
+      # Create a table in HDFS
+      hdfs_table_name = self.random_table_name()
+      unique_cursor.execute("""
+          CREATE TABLE %s (a INT) PARTITIONED BY (x INT)""" % (hdfs_table_name))
+
+      unique_cursor.execute("USE DEFAULT")
+      unique_cursor.execute("DROP DATABASE %s CASCADE" % db_name)
+      unique_cursor.execute("SHOW DATABASES")
+      assert (db_name, '') not in unique_cursor.fetchall()
+      assert not kudu_client.table_exists(kudu_table.name)
+
+  @pytest.mark.execute_serially
+  def test_drop_managed_kudu_table(self, cursor, kudu_client, unique_database):
+    """Check that dropping a managed Kudu table should fail if the underlying
+       Kudu table has been dropped externally.
+    """
+    impala_tbl_name = "foo"
+    cursor.execute("""CREATE TABLE %s.%s (a INT PRIMARY KEY) PARTITION BY HASH (a)
+        PARTITIONS 3 STORED AS KUDU""" % (unique_database, impala_tbl_name))
+    kudu_tbl_name = KuduTestSuite.to_kudu_table_name(unique_database, impala_tbl_name)
+    assert kudu_client.table_exists(kudu_tbl_name)
+    kudu_client.delete_table(kudu_tbl_name)
+    assert not kudu_client.table_exists(kudu_tbl_name)
+    try:
+      cursor.execute("DROP TABLE %s" % kudu_tbl_name)
+      assert False
+    except Exception as e:
+      LOG.info(str(e))
+      assert "Table %s no longer exists in the Hive MetaStore." % kudu_tbl_name in str(e)
+
+  @pytest.mark.execute_serially
+  def test_drop_external_kudu_table(self, cursor, kudu_client, unique_database):
+    """Check that Impala can recover from the case where the underlying Kudu table of
+       an external table is dropped using the Kudu client.
+    """
+    with self.temp_kudu_table(kudu_client, [INT32], db_name=unique_database) \
+        as kudu_table:
+      # Create an external Kudu table
+      impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
+      external_table_name = "%s_external" % impala_table_name
+      props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
+      cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (
+        external_table_name, props))
+      cursor.execute("DESCRIBE %s" % (external_table_name))
+      assert cursor.fetchall() == \
+             [("a", "int", "", "true", "false", "", "AUTO_ENCODING",
+               "DEFAULT_COMPRESSION", "0")]
+      # Drop the underlying Kudu table
+      kudu_client.delete_table(kudu_table.name)
+      assert not kudu_client.table_exists(kudu_table.name)
+      err_msg = 'the table does not exist: table_name: "%s"' % (kudu_table.name)
+      try:
+        cursor.execute("REFRESH %s" % (external_table_name))
+      except Exception as e:
+        assert err_msg in str(e)
+      cursor.execute("DROP TABLE %s" % (external_table_name))
+      cursor.execute("SHOW TABLES")
+      assert (external_table_name,) not in cursor.fetchall()


[impala] 02/02: IMPALA-8425: part 1: reduce size of binaries in container

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 45c5652b6694864afef2af3fdf93d3bd8dfa7190
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Fri May 31 09:18:54 2019 -0700

    IMPALA-8425: part 1: reduce size of binaries in container
    
    * Symlink impalad/catalog/statestored inside container.
      This doesn't seem to really save any space - there's
      some kind of deduplication going on.
    * Don't include libfesupport.so, which shouldn't be needed.
    * strip debug symbols from the binary.
    * Only include the libkuduclient.so libraries for Kudu
    
    This shaves ~1.1GB from the image size- 250MB as a result
    of the impalad binary changes and the remainder from the
    Kudu changs.
    
    Change-Id: I95ff479bedd3b93e6569e72f03f42acd9dba8b14
    Reviewed-on: http://gerrit.cloudera.org:8080/13487
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/daemon_entrypoint.sh   |  4 ++--
 docker/impala_base/Dockerfile | 26 ++++++++++++++------------
 docker/setup_build_context.py | 25 +++++++++++++++++--------
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/docker/daemon_entrypoint.sh b/docker/daemon_entrypoint.sh
index 1bc7752..16fa7f0 100755
--- a/docker/daemon_entrypoint.sh
+++ b/docker/daemon_entrypoint.sh
@@ -27,9 +27,9 @@ export IMPALA_HOME=/opt/impala
 
 # Add directories containing dynamic libraries required by the daemons that
 # are not on the system library paths.
-export LD_LIBRARY_PATH=/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
+export LD_LIBRARY_PATH=/opt/impala/lib
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
 LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/
-LD_LIBRARY_PATH+=:/opt/kudu/release/lib
 
 # Add directory with optional plugins that can be mounted for the container.
 LD_LIBRARY_PATH+=:/opt/impala/lib/plugins
diff --git a/docker/impala_base/Dockerfile b/docker/impala_base/Dockerfile
index 3f9bee0..225adda 100644
--- a/docker/impala_base/Dockerfile
+++ b/docker/impala_base/Dockerfile
@@ -24,24 +24,26 @@ RUN apt-get update && \
   apt-get install -y openjdk-8-jre-headless \
   libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
   tzdata liblzo2-2 && \
-  apt-get clean
-
-# Use a non-privileged impala user to run the daemons in the container.
-# That user should own everything in the /opt/impala subdirectory.
-RUN groupadd -r impala && useradd --no-log-init -r -g impala impala && \
-    mkdir -p /opt/impala && chown impala /opt/impala && \
-    chmod ugo+w /etc/passwd
-USER impala
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
 
 # Copy build artifacts required for the daemon processes.
 # Need to have multiple copy commands to preserve directory structure.
-COPY bin /opt/impala/bin
 COPY lib /opt/impala/lib
 COPY www /opt/impala/www
-COPY kudu /opt/kudu
+COPY bin /opt/impala/bin
+# Symlink here instead of in setup_build_context to avoid duplicate binaries.
+RUN cd /opt/impala/bin && ln -s impalad statestored && ln -s impalad catalogd && \
 # Create conf directory for later config injection.
-RUN mkdir /opt/impala/conf
+    mkdir /opt/impala/conf && \
 # Create logs directory to collect container logs.
-RUN mkdir /opt/impala/logs
+    mkdir /opt/impala/logs
+
+# Use a non-privileged impala user to run the daemons in the container.
+# That user should own everything in the /opt/impala subdirectory.
+RUN groupadd -r impala && useradd --no-log-init -r -g impala impala && \
+    mkdir -p /opt/impala && chown impala -R /opt/impala && \
+    chmod ugo+w /etc/passwd
+USER impala
 
 WORKDIR /opt/impala/
diff --git a/docker/setup_build_context.py b/docker/setup_build_context.py
index 817263e..b953522 100755
--- a/docker/setup_build_context.py
+++ b/docker/setup_build_context.py
@@ -23,6 +23,7 @@
 import glob
 import os
 import shutil
+from subprocess import check_call
 
 IMPALA_HOME = os.environ["IMPALA_HOME"]
 OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context")
@@ -30,7 +31,13 @@ DOCKERFILE = os.path.join(IMPALA_HOME, "docker/impala_base/Dockerfile")
 
 IMPALA_TOOLCHAIN = os.environ["IMPALA_TOOLCHAIN"]
 IMPALA_GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
+IMPALA_BINUTILS_VERSION = os.environ["IMPALA_BINUTILS_VERSION"]
 GCC_HOME = os.path.join(IMPALA_TOOLCHAIN, "gcc-{0}".format(IMPALA_GCC_VERSION))
+BINUTILS_HOME = os.path.join(
+    IMPALA_TOOLCHAIN, "binutils-{0}".format(IMPALA_BINUTILS_VERSION))
+STRIP = os.path.join(BINUTILS_HOME, "bin/strip")
+KUDU_HOME = os.environ["IMPALA_KUDU_HOME"]
+KUDU_LIB_DIR = os.path.join(KUDU_HOME, "release/lib")
 
 # Ensure the output directory exists and is empty.
 if os.path.exists(OUTPUT_DIR):
@@ -47,18 +54,20 @@ os.mkdir(LIB_DIR)
 
 
 def symlink_file_into_dir(src_file, dst_dir):
-    """Helper to symlink 'src_file' into 'dst_dir'."""
-    os.symlink(src_file, os.path.join(dst_dir, os.path.basename(src_file)))
+  """Helper to symlink 'src_file' into 'dst_dir'."""
+  os.symlink(src_file, os.path.join(dst_dir, os.path.basename(src_file)))
 
 
 # Impala binaries and native dependencies.
-for bin in ["impalad", "statestored", "catalogd", "libfesupport.so"]:
-    symlink_file_into_dir(os.path.join(IMPALA_HOME, "be/build/latest/service", bin),
-         BIN_DIR)
+check_call([STRIP, "--strip-debug",
+            os.path.join(IMPALA_HOME, "be/build/latest/service/impalad"),
+            "-o", os.path.join(BIN_DIR, "impalad")])
 for lib in ["libstdc++", "libgcc"]:
-    for so in glob.glob(os.path.join(GCC_HOME, "lib64/{0}*.so*".format(lib))):
-        symlink_file_into_dir(so, LIB_DIR)
-os.symlink(os.environ["IMPALA_KUDU_HOME"], os.path.join(OUTPUT_DIR, "kudu"))
+  for so in glob.glob(os.path.join(GCC_HOME, "lib64/{0}*.so*".format(lib))):
+    symlink_file_into_dir(so, LIB_DIR)
+
+for so in glob.glob(os.path.join(KUDU_LIB_DIR, "libkudu_client.so*")):
+  symlink_file_into_dir(so, LIB_DIR)
 
 # Impala dependencies.
 dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()