You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/06/13 00:05:48 UTC

[impala] 03/03: IMPALA-9843: Add support for metastore db schema upgrade

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f8c28f8adfd781727c311b15546a532ce65881e0
Author: Vihang Karajgaonkar <vi...@apache.org>
AuthorDate: Tue Jun 9 12:44:21 2020 -0700

    IMPALA-9843: Add support for metastore db schema upgrade
    
    This change adds support to upgrade the HMS database schema using the
    hive schema tool. It adds a new option to the buildall.sh script
    which can be provided to upgrade the HMS db schema. Alternatively,
    users can directly upgrade the schema using the
    create-test-configuration.sh script. The logs for the schema upgrade
    are available in logs/cluster/schematool.log.
    
    Following invocations will upgrade the HMS database schema.
    
    1. buildall.sh -upgrade_metastore_db
    2. bin/create-test-configuration.sh -upgrade_metastore_db
    
    This upgrade option is idempotent. It is a no-op if the metastore
    schema is already at its latest version. In case of any errors, the
    only fallback currently is to format the metastore schema and load
    the test data again.
    
    Testing:
    Upgraded the HMS schema on my local dev environment and made
    sure that the HMS service starts without any errors.
    
    Change-Id: I85af8d57e110ff284832056a1661f94b85ed3b09
    Reviewed-on: http://gerrit.cloudera.org:8080/16054
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/create-test-configuration.sh | 13 +++++++++++++
 buildall.sh                      | 20 +++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 8ab2e48..83d500b 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -66,6 +66,7 @@ function generate_config {
 
 CREATE_METASTORE=0
 CREATE_RANGER_POLICY_DB=0
+UPGRADE_METASTORE_DB=0
 
 # parse command line options
 for ARG in $*
@@ -77,9 +78,13 @@ do
     -create_ranger_policy_db)
       CREATE_RANGER_POLICY_DB=1
       ;;
+    -upgrade_metastore_db)
+      UPGRADE_METASTORE_DB=1
+      ;;
     -help|*)
       echo "[-create_metastore] : If true, creates a new metastore."
       echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
+      echo "[-upgrade_metastore_db] : If true, upgrades the schema of HMS db."
       exit 1
       ;;
   esac
@@ -163,12 +168,20 @@ if [ $CREATE_METASTORE -eq 1 ]; then
   # version and invokes the appropriate scripts
   CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -initSchema -dbType \
 postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
+  # TODO: We probably don't need to do this anymore
   # Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
   # HBase tables with large number of columns.
   echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
       | psql -q -U hiveuser -d ${METASTORE_DB}
 fi
 
+if [ $UPGRADE_METASTORE_DB -eq 1 ]; then
+  echo "Upgrading the schema of metastore db ${METASTORE_DB}. Check \
+${IMPALA_CLUSTER_LOGS_DIR}/schematool.log for details."
+  CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -upgradeSchema \
+-dbType postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
+fi
+
 if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
   echo "Creating Ranger Policy Server DB"
   dropdb -U hiveuser "${RANGER_POLICY_DB}" 2> /dev/null || true
diff --git a/buildall.sh b/buildall.sh
index 158de01..dbe4030 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -58,6 +58,7 @@ TESTDATA_ACTION=0
 TESTS_ACTION=1
 FORMAT_CLUSTER=0
 FORMAT_METASTORE=0
+UPGRADE_METASTORE_SCHEMA=0
 FORMAT_RANGER_POLICY_DB=0
 NEED_MINICLUSTER=0
 START_IMPALA_CLUSTER=0
@@ -114,6 +115,9 @@ do
     -format_metastore)
       FORMAT_METASTORE=1
       ;;
+    -upgrade_metastore_db)
+      UPGRADE_METASTORE_SCHEMA=1
+      ;;
     -format_ranger_policy_db)
       FORMAT_RANGER_POLICY_DB=1
       ;;
@@ -201,6 +205,8 @@ do
            "[Default: False]"
       echo "[-format_cluster] : Format the minicluster [Default: False]"
       echo "[-format_metastore] : Format the metastore db [Default: False]"
+      echo "[-upgrade_metastore_db] : Upgrades the schema of metastore db"\
+           "[Default: False]"
       echo "[-format_ranger_policy_db] : Format the Ranger policy db [Default: False]"
       echo "[-release_and_debug] : Build both release and debug binaries. Overrides "\
            "other build types [Default: false]"
@@ -269,7 +275,10 @@ Examples of common tasks:
   ./buildall.sh -testdata
 
   # Build, format mini-cluster and metastore, load all test data, run tests
-  ./buildall.sh -testdata -format"
+  ./buildall.sh -testdata -format
+
+  # Build and upgrade metastore schema to latest.
+  ./buildall.sh -upgrade_metastore_db"
       exit 1
       ;;
     esac
@@ -349,7 +358,7 @@ fi
 
 if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
       $FORMAT_METASTORE -eq 1 || $FORMAT_RANGER_POLICY_DB -eq 1 || -n "$SNAPSHOT_FILE" ||
-      -n "$METASTORE_SNAPSHOT_FILE" ]]; then
+      -n "$METASTORE_SNAPSHOT_FILE" || $UPGRADE_METASTORE_SCHEMA -eq 1 ]]; then
   NEED_MINICLUSTER=1
 fi
 
@@ -486,7 +495,8 @@ reconfigure_test_cluster() {
   "${IMPALA_HOME}/bin/start-impala-cluster.py" --kill --force
 
   if [[ "$FORMAT_METASTORE" -eq 1 || "$FORMAT_CLUSTER" -eq 1 ||
-        "$FORMAT_RANGER_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ]]
+        "$FORMAT_RANGER_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ||
+        "$UPGRADE_METASTORE_SCHEMA" -eq 1 ]]
   then
     # Kill any processes that may be accessing postgres metastore. To be safe, this is
     # done before we make any changes to the config files.
@@ -502,6 +512,10 @@ reconfigure_test_cluster() {
     CREATE_TEST_CONFIG_ARGS+=" -create_metastore"
   fi
 
+  if [[ "$UPGRADE_METASTORE_SCHEMA" -eq 1 ]]; then
+    CREATE_TEST_CONFIG_ARGS+=" -upgrade_metastore_db"
+  fi
+
   # Generate the Hadoop configs needed by Impala
   "${IMPALA_HOME}/bin/create-test-configuration.sh" ${CREATE_TEST_CONFIG_ARGS}