You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/15 07:20:01 UTC

[1/8] impala git commit: IMPALA-4464: Remove /bin/remote_data_load.py

Repository: impala
Updated Branches:
  refs/heads/master 19bcc3099 -> 97ecc154b


IMPALA-4464: Remove /bin/remote_data_load.py

This file was started before the ASF project was set up, and
committed as-is. However, it relies on some internal resources
not generally available to the external Apache community at large,
and so serves no purpose in that context.

Change-Id: I002efae6ad538d371680ce23099277708ed67e0e
Reviewed-on: http://gerrit.cloudera.org:8080/10388
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: David Knupp <dk...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/fc47a545
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/fc47a545
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/fc47a545

Branch: refs/heads/master
Commit: fc47a545d69fe321ad835575a399cfacacd5e982
Parents: 19bcc30
Author: David Knupp <dk...@cloudera.com>
Authored: Mon May 14 01:03:31 2018 -0700
Committer: David Knupp <dk...@cloudera.com>
Committed: Mon May 14 20:34:36 2018 +0000

----------------------------------------------------------------------
 bin/remote_data_load.py | 560 -------------------------------------------
 1 file changed, 560 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/fc47a545/bin/remote_data_load.py
----------------------------------------------------------------------
diff --git a/bin/remote_data_load.py b/bin/remote_data_load.py
deleted file mode 100755
index 85a9f95..0000000
--- a/bin/remote_data_load.py
+++ /dev/null
@@ -1,560 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2015 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# This is a setup script that will downloaded a test warehouse snapshot and
-# deploy it on a remote, CM-managed cluster. Once the data is loaded, it is
-# possible to run a subset of the Impala core / exhaustive tests on the
-# remote cluster.
-#
-#   * This script should be executed from a machine that has the Impala
-#     development environment set up.
-#
-#   * The cluster needs to be configured appropriately:
-#     - The following services need to be installed:
-#       HDFS, YARN, HIVE, IMPALA, MAPREDUCE, KUDU, HBASE, ZOOKEEPER
-#     - GPL Extras parcel needs to be installed
-#     - Metastore DB SERDE properties PARAM_VALUE needs to be altered to
-#       allow for wide tables (See HIVE-1364.)
-#     - The hive-warehouse path needs to be /test-warehouse
-#
-# Usage: remote_data_load.py [options] cm_host
-#
-#  Options:
-#    -h, --help           show this help message and exit
-#    --cm_user=CM_USER    Cloudera Manager admin user
-#    --cm_pass=CM_PASS    Cloudera Manager admin user password
-#    --no-load            Do not try to load the snapshot
-#    --test               Run end-to-end tests against cluster.
-#    --gateway=GATEWAY    Gateway host to upload the data from. If not set, uses
-#                         the CM host as gateway.
-#    --ssh_user=SSH_USER  System user on the remote machine with passwordless SSH
-#                         configured.
-#
-import fnmatch
-import glob
-import logging
-import os
-import sh
-import shutil
-import sys
-import time
-
-from cm_api.api_client import ApiResource
-from functools import wraps
-from optparse import OptionParser
-from sh import ssh
-from tempfile import mkdtemp
-from urllib import quote as urlquote
-
-
-REQUIRED_SERVICES = ['HBASE',
-                     'HDFS',
-                     'HIVE',
-                     'IMPALA',
-                     'KUDU',
-                     'MAPREDUCE',
-                     'YARN',
-                     'ZOOKEEPER']
-
-# TODO: It's not currently possible to get the version from the cluster.
-# It would be nice to generate this dynamically.
-# (v14 happens to be the version that ships with CDH 5.9.x)
-CM_API_VERSION = 'v14'
-
-# Impala's data loading and test framework assumes this Hive Warehouse Directory.
-# Making this configurable would be an invasive change, and therefore, we prefer to
-# re-configure the Hive service via the CM API before loading data and running tests.
-HIVE_WAREHOUSE_DIR = "/test-warehouse"
-
-logger = logging.getLogger("remote_data_load")
-logger.setLevel(logging.DEBUG)
-
-# Goes to the file
-fh = logging.FileHandler("remote_data_load.log")
-fh.setLevel(logging.DEBUG)
-
-# Goes to stdout
-ch = logging.StreamHandler()
-ch.setLevel(logging.INFO)
-
-# create formatter and add it to the handlers
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-fh.setFormatter(formatter)
-ch.setFormatter(formatter)
-
-# add the handlers to the logger
-logger.addHandler(fh)
-logger.addHandler(ch)
-
-
-def timing(func):
-    """
-    A decorator for timing how much time a function takes.
-
-    We can modify this later to do something more intelligent than just logging.
-    """
-    @wraps(func)
-    def wrap(*args, **kwargs):
-        t1 = time.time()
-        result = func(*args, **kwargs)
-        t2 = time.time()
-
-        output = 'TIME: {name}() took: {t:.4f} seconds'
-        logger.info(output.format(name=func.__name__, t=(t2-t1)))
-        return result
-    return wrap
-
-
-def tee(line):
-    """Output wrapper function used by sh to send the stdout or stderr to the
-    module's logger."""
-    logger.debug(line.strip())
-
-
-class RemoteDataLoad(object):
-    """This is an implementation of the process to load a test-warehouse snapshot on
-    a remote CM managed cluster. This script assumes that the warehouse snapshot was
-    already downloaded and was either passed in as a parameter, or can be found by
-    either inspecting the SNAPSHOT_DIR environment variable, or based on the WORKSPACE
-    environment variable on a Jenkins build slave.
-
-    The reason for the additional setup code is that in the local development
-    environment it is assumed that $USER is HDFS superuser, which is not the case for
-    remote deloyments.
-    """
-
-    def __init__(self, cm_host, options):
-        logger.info("Starting remote data load...")
-        self.options = options
-        self.cm_host = cm_host
-
-        # Gateway host can be used if the CM host is not configured as a Hadoop gateway
-        self.gateway = options.gateway if options.gateway else cm_host
-        self.impala_home = os.environ["IMPALA_HOME"]
-        self.api = ApiResource(self.cm_host, username=options.cm_user,
-                               password=options.cm_pass)
-
-        # The API returns a list of clusters managed by the CM host. We're assuming
-        # that this CM host was set up for the purpose of Impala testing on one
-        # cluster, so the list should only have one value.
-        self.cluster = self.api.get_all_clusters()[0]
-        self.services = self.get_services()
-
-        self.config = self.get_service_client_configurations()
-        logger.info("Retrieved service configuration")
-        logger.info(str(self.config))
-        self.prepare()
-        logger.info("IMPALA_HOME: {0}".format(self.impala_home))
-
-    def get_hostname_for_ref(self, host_ref):
-        """Translate the HostRef instance into the hostname."""
-        return self.api.get_host(host_ref.hostId).hostname
-
-    @staticmethod
-    def get_or_default(config):
-        return config.value if config.value else config.default
-
-    def get_services(self):
-        """Confirm that all services are running, and return service dict."""
-        services = dict((s.type, s) for s in self.cluster.get_all_services())
-
-        if set(REQUIRED_SERVICES) != set(services.keys()):
-            missing_services = set(REQUIRED_SERVICES) - set(services.keys())
-            logger.error("Services not installed: {0}".format(list(missing_services)))
-            raise RuntimeError("Cluster not ready.")
-
-        if not all(services[s].serviceState == 'STARTED' for s in services):
-            stopped = [s for s in services if services[s].serviceState != "STARTED"]
-            logger.error("Not all services started: {0}".format(stopped))
-            raise RuntimeError("Cluster not ready.")
-
-        return services
-
-    @timing
-    def download_client_config(self, cluster, service):
-        """Download the client configuration zip for a particular cluster and service.
-
-        Since cm_api does not provide a way to download the archive we build the URL
-        manually and download the file. Once it downloaded the file the archive is
-        extracted and its content is copied to the Hadoop configuration directories
-        defined by Impala.
-        """
-        logger.info("Downloading client configuration for {0}".format(service.name))
-        url = "http://{0}:7180/api/{1}/clusters/{2}/services/{3}/clientConfig".format(
-            self.cm_host, CM_API_VERSION, urlquote(cluster.name), urlquote(service.name))
-        path = mkdtemp()
-        sh.curl(url, o=os.path.join(path, "clientConfig.zip"), _out=tee, _err=tee)
-        current = os.getcwd()
-        os.chdir(path)
-        sh.unzip("clientConfig.zip")
-        for root, _, file_names in os.walk("."):
-            for filename in fnmatch.filter(file_names, "*.xml"):
-                src = os.path.join(root, filename)
-                dst = os.path.join(self.impala_home, "fe", "src", "test", "resources")
-                logger.debug("Copying {0} to {1}".format(src, dst))
-                shutil.copy(src, dst)
-        os.chdir(current)
-
-    # TODO: this may be available in tests/comparison/cluster.py
-    def set_hive_warehouse_dir(self, cluster, service):
-        logger.info("Setting the Hive Warehouse Dir")
-        for service in self.api.get_all_clusters()[0].get_all_services():
-            logger.info(service)
-            if service.type == "HIVE":
-              hive_config = { "hive_warehouse_directory" : HIVE_WAREHOUSE_DIR }
-              service.update_config(hive_config)
-
-    # TODO: This functionality should be more generally available to other infrastructure
-    # code, rather than being quarantined in this script. See IMPALA-4367.
-    @timing
-    def get_service_client_configurations(self):
-        """Download the client configurations necessary to upload data to the remote
-        cluster. Unfortunately, the CM API does not allow downloading it so we have to
-        iterate over the services and download the config for all of them.
-
-        In addition, returns an options dictionary with settings required for data loading
-        like the HS2 server, Impala hosts, Name node etc.
-
-        Returns:
-            A client-configuration dictionary, e.g.:
-
-            {
-                'hive_warehouse_directory': '/test-warehouse',
-                'hs2': 'impala-test-cluster-1.gce.cloudera.com:10000',
-                'impalad': ['impala-test-cluster-4.gce.cloudera.com:21000',
-                            'impala-test-cluster-2.gce.cloudera.com:21000',
-                            'impala-test-cluster-3.gce.cloudera.com:21000'],
-                'metastore': 'impala-test-cluster-1.gce.cloudera.com:9083',
-                'namenode': 'impala-test-cluster-1.gce.cloudera.com',
-                'namenode_http': 'impala-test-cluster-1.gce.cloudera.com:20101',
-                'kudu_master': 'impala-test-cluster-1.gce.cloudera.com'
-            }
-        """
-        # Iterate overs services and find the information we need
-        result = {}
-        for service_type, service in self.services.iteritems():
-            if service_type == "IMPALA":
-                roles = service.get_roles_by_type("IMPALAD")
-                impalads = []
-                for r in roles:
-                    rc_config = r.get_config("full")
-                    hostname = self.get_hostname_for_ref(r.hostRef)
-                    hs2_port = self.get_or_default(rc_config["beeswax_port"])
-                    impalads.append("{0}:{1}".format(hostname, hs2_port))
-                    result["impalad"] = impalads
-            elif service_type == "HBASE":
-                self.download_client_config(self.cluster, service)
-            elif service_type == "HDFS":
-                self.download_client_config(self.cluster, service)
-                role = service.get_roles_by_type("NAMENODE")
-                config = role[0].get_config("full")
-                namenode = self.get_hostname_for_ref(role[0].hostRef)
-                result["namenode"] = namenode
-                result["namenode_http"] = "{0}:{1}".format(
-                    namenode,
-                    self.get_or_default(config["dfs_http_port"])
-                )
-            elif service_type == "HIVE":
-                self.set_hive_warehouse_dir(self.cluster, service)
-                self.download_client_config(self.cluster, service)
-                hs2 = service.get_roles_by_type("HIVESERVER2")[0]
-                rc_config = hs2.get_config("full")
-                result["hive_warehouse_directory"] = self.get_or_default(
-                    service.get_config("full")[0]["hive_warehouse_directory"])
-                hostname = self.get_hostname_for_ref(hs2.hostRef)
-                result["hs2"] = "{0}:{1}".format(hostname, self.get_or_default(
-                    rc_config["hs2_thrift_address_port"]))
-
-                # Get Metastore information
-                ms = service.get_roles_by_type("HIVEMETASTORE")[0]
-                rc_config = ms.get_config("full")
-                result["metastore"] = "{0}:{1}".format(
-                    self.get_hostname_for_ref(ms.hostRef),
-                    self.get_or_default(rc_config["hive_metastore_port"])
-                )
-            elif service_type == "KUDU":
-                # Service KUDU does not require a client configuration
-                result["kudu_master"] = self.cm_host
-
-        return result
-
-    # TODO: This functionality should be more generally available to other infrastructure
-    # code, rather than being quarantined in this script. See IMPALA-4367.
-    @staticmethod
-    def find_snapshot_file(snapshot_dir):
-        """Given snapshot_directory, walks the directory tree until it finds a file
-        matching the test-warehouse archive pattern."""
-        for root, _, file_names in os.walk(snapshot_dir):
-            for filename in fnmatch.filter(file_names, "test-warehouse-*-SNAPSHOT.tar.gz"):
-                logger.info("Found Snapshot file {0}".format(filename))
-                return os.path.join(root, filename)
-
-    @timing
-    def prepare(self):
-        """Populate the environment of the process with the necessary values.
-
-        In addition, it creates helper objects to run shell and SSH processes.
-        """
-        # Populate environment with required variables
-        os.environ["HS2_HOST_PORT"] = self.config["hs2"]
-        os.environ["HDFS_NN"] = self.config["namenode"]
-        os.environ["IMPALAD"] = self.config["impalad"][0]
-        os.environ["REMOTE_LOAD"] = "1"
-        os.environ["HADOOP_USER_NAME"] = "hdfs"
-        os.environ["TEST_WAREHOUSE_DIR"] = self.config["hive_warehouse_directory"]
-        os.environ["KUDU_MASTER"] = self.config["kudu_master"]
-
-        if self.options.snapshot_file is None:
-            if "SNAPSHOT_DIR" in os.environ:
-                snapshot_dir = os.environ["SNAPSHOT_DIR"]
-            else:
-                snapshot_dir = "{0}/testdata/test-warehouse-SNAPSHOT".format(
-                    os.getenv("WORKSPACE"))
-            if not os.path.isdir(snapshot_dir):
-                err_msg = 'Snapshot directory "{0}" is not a valid directory'
-                logger.error(err_msg.format(snapshot_dir))
-                raise OSError("Could not find test-warehouse snapshot file.")
-
-            logger.info("Snapshot directory: {0}".format(snapshot_dir))
-            self.snapshot_file = self.find_snapshot_file(snapshot_dir)
-        else:
-            self.snapshot_file = self.options.snapshot_file
-
-        # Prepare shortcuts for connecting to remote services
-        self.gtw_ssh = ssh.bake("{0}@{1}".format(self.options.ssh_user, self.gateway),
-                                "-oStrictHostKeyChecking=no",
-                                "-oUserKnownHostsFile=/dev/null",
-                                t=True, _out=tee, _err=tee)
-
-        self.beeline = sh.beeline.bake(silent=False, outputformat="csv2", n="impala",
-                                       u="jdbc:hive2://{0}/default".format(
-                                           self.config["hs2"]))
-
-        self.load_test_warehouse = sh.Command(
-            "{0}/testdata/bin/load-test-warehouse-snapshot.sh".format(
-                self.impala_home)).bake(
-            _out=tee, _err=tee)
-
-        self.create_load_data = sh.Command(
-            "{0}/testdata/bin/create-load-data.sh".format(self.impala_home))
-
-        self.main_impalad = self.config["impalad"][0]
-        self.impala_shell = sh.Command("impala-shell.sh").bake(i=self.main_impalad,
-                                                               _out=tee, _err=tee)
-
-        self.python = sh.Command("impala-python").bake(u=True)
-        self.compute_stats = sh.Command(
-            "{0}/testdata/bin/compute-table-stats.sh".format(self.impala_home)).bake(
-            _out=tee, _err=tee)
-
-    @timing
-    def load(self):
-        """This method performs the actual data load. First it removes any known artifacts
-        from the remote location. Next it drops potentially existing database from the
-        Hive Metastore. Now, it invokes the load-test-warehouse-snapshot.sh and
-        create-load-data.sh scripts with the appropriate parameters. The most important
-        paramters are implicitly passed to the scripts as environment variables pointing
-        to the remote HDFS, Hive and Impala.
-        """
-        exploration_strategy = self.options.exploration_strategy
-
-        logger.info("Removing other databases")
-        dblist = self.beeline(e="show databases;", _err=tee).stdout
-        database_list = dblist.split()[1:]  # The first element is the header string
-        for db in database_list:
-            if db.strip() != "default":
-                logger.debug("Dropping database %s", db)
-                self.impala_shell(q="drop database if exists {0} cascade;".format(db))
-
-        logger.info("Invalidating metadata in Impala")
-        self.impala_shell(q="invalidate metadata;")
-
-        logger.info("Removing previous remote {0}".format(
-            self.config["hive_warehouse_directory"]))
-        r = sh.hdfs.dfs("-rm", "-r", "-f", "{0}".format(
-            self.config["hive_warehouse_directory"]))
-
-        logger.info("Expunging HDFS trash")
-        r = sh.hdfs.dfs("-expunge")
-
-        logger.info("Uploading test warehouse snapshot")
-        self.load_test_warehouse(self.snapshot_file)
-
-        # TODO: We need to confirm that if we change any permissions, that we don't
-        # affect any running tests. See IMPALA-4375.
-        logger.info("Changing warehouse ownership")
-        r = sh.hdfs.dfs("-chown", "-R", "impala:hdfs", "{0}".format(
-            self.config["hive_warehouse_directory"]))
-        sh.hdfs.dfs("-chmod", "-R", "g+rwx", "{0}".format(
-            self.config["hive_warehouse_directory"]))
-        sh.hdfs.dfs("-chmod", "1777", "{0}".format(
-            self.config["hive_warehouse_directory"]))
-
-        logger.info("Calling create_load_data.sh")
-        # The $USER variable is used in the create-load-data.sh script for beeline
-        # impersonation.
-        new_env = os.environ.copy()
-        new_env["LOGNAME"] = "impala"
-        new_env["USER"] = "impala"
-        new_env["USERNAME"] = "impala"
-
-        # Regardless of whether we are in fact skipping the snapshot load or not,
-        # we nonetheless always pass -skip_snapshot_load to create-load-data.sh.
-        # This is because we have already loaded the snapshot earlier in this
-        # script, so we don't want create-load-data.sh to invoke
-        # load-test-warehouse-snapshot.sh again.
-        #
-        # It would actually be nice to be able to skip the snapshot load, but
-        # because of the existing messiness of create-load-data.sh, we can't.
-        # This invocation...
-        #
-        #    $ create-load-data.sh -skip_snapshot_load -exploration_strategy core
-        #
-        # ...results in this error:
-        #
-        #    Creating /test-warehouse HDFS directory \
-        #    (logging to create-test-warehouse-dir.log)... FAILED
-        #    'hadoop fs -mkdir /test-warehouse' failed. Tail of log:
-        #    Log for command 'hadoop fs -mkdir /test-warehouse'
-        #    mkdir: `/test-warehouse': File exists
-        #
-        # Similarly, even though we might pass in "core" as the exploration strategy,
-        # because we aren't loading a metadata snapshot (i.e., -skip_metadata_load is
-        # false), an exhaustive dataload will always be done. This again is the result
-        # of logic in create-load-data.sh, which itself ignores the value passed in
-        # for -exploration_strategy.
-        #
-        # See IMPALA-4399: "create-load-data.sh has bitrotted to some extent, and needs
-        #                   to be cleaned up"
-        create_load_data_args = ["-skip_snapshot_load", "-cm_host", self.cm_host,
-                                 "-snapshot_file", self.snapshot_file,
-                                 "-exploration_strategy", exploration_strategy]
-
-        self.create_load_data(*create_load_data_args, _env=new_env, _out=tee, _err=tee)
-
-        sh.hdfs.dfs("-chown", "-R", "impala:hdfs", "{0}".format(
-            self.config["hive_warehouse_directory"]))
-
-        logger.info("Re-load HBase data")
-        # Manually load the HBase data last.
-        self.python("{0}/bin/load-data.py".format(self.impala_home),
-                    "--hive_warehouse_dir={0}".format(
-                        self.config["hive_warehouse_directory"]),
-                    "--table_formats=hbase/none",
-                    "--hive_hs2_hostport={0}".format(self.config["hs2"]),
-                    "--hdfs_namenode={0}".format(self.config["namenode"]),
-                    "--exploration_strategy={0}".format(exploration_strategy),
-                    workloads="functional-query",
-                    force=True,
-                    impalad=self.main_impalad,
-                    _env=new_env,
-                    _out=tee,
-                    _err=tee)
-
-        self.compute_stats()
-        logger.info("Load data finished")
-
-    # TODO: Should this be refactored out of this script? It has nothing to do with
-    # data loading per se. If tests rely on the environment on the client being set
-    # a certain way -- as in the prepare() method -- we may need to find another way
-    # to deal with that. See IMPALA-4376.
-    @timing
-    def test(self):
-        """Execute Impala's end-to-end tests against a remote cluster. All configuration
-        paramters are picked from the cluster configuration that was fetched via the
-        CM API."""
-
-        # TODO: Running tests via runtest.py is currently not working against a remote
-        # cluster (although running directly via py.test seems to work.) This method
-        # may be refactored out of this file under IMPALA-4376, so for the time being,
-        # raise a NotImplementedError.
-        raise NotImplementedError
-
-        # Overwrite the username to match the service user on the remote system and deal
-        # with the assumption that in the local development environment the current user
-        # is HDFS superuser as well.
-        new_env = os.environ.copy()
-        new_env["LOGNAME"] = "impala"
-        new_env["USER"] = "impala"
-        new_env["USERNAME"] = "impala"
-
-        strategy = self.options.exploration_strategy
-        logger.info("Running tests with exploration strategy {0}".format(strategy))
-        run_tests = sh.Command("{0}/tests/run-tests.py".format(self.impala_home))
-        run_tests("--skip_local_tests",
-                  "--exploration_strategy={0}".format(strategy),
-                  "--workload_exploration_strategy=functional-query:{0}".format(strategy),
-                  "--namenode_http_address={0}".format(self.config["namenode_http"]),
-                  "--hive_server2={0}".format(self.config["hs2"]),
-                  "--metastore_server={0}".format(self.config["metastore"]),
-                  "query_test",
-                  maxfail=10,
-                  impalad=",".join(self.config["impalad"]),
-                  _env=new_env,
-                  _out=tee,
-                  _err=tee)
-
-
-def parse_args():
-    parser = OptionParser()
-    parser.add_option("--snapshot-file", default=None,
-                      help="Path to the test-warehouse archive")
-    parser.add_option("--cm-user", default="admin", help="Cloudera Manager admin user")
-    parser.add_option("--cm-pass", default="admin",
-                      help="Cloudera Manager admin user password")
-    parser.add_option("--gateway", default=None,
-                      help=("Gateway host to upload the data from. If not set, uses the "
-                            "CM host as gateway."))
-    parser.add_option("--ssh-user", default="jenkins",
-                      help=("System user on the remote machine with passwordless "
-                            "SSH configured."))
-    parser.add_option("--no-load", action="store_false", default=True, dest="load",
-                      help="Do not try to load the snapshot")
-    parser.add_option("--exploration-strategy", default="core")
-    parser.add_option("--test", action="store_true", default=False,
-                      help="Run end-to-end tests against cluster")
-    parser.set_usage("remote_data_load.py [options] cm_host")
-
-    options, args = parser.parse_args()
-
-    try:
-        return options, args[0]  # args[0] is the cm_host
-    except IndexError:
-        logger.error("You must supply the cm_host.")
-        parser.print_usage()
-        raise
-
-def main(cm_host, options):
-    """
-    Load data to a remote cluster (and/or run tests) as specified.
-
-    Args:
-        cm_host: FQDN or IP of the CM host machine
-        options: an optparse 'options' instance containing RemoteDataLoad
-                 values (though any object with the correct .attributes, e.g.
-                 a collections.namedtuple instance, would also work)
-    """
-    rd = RemoteDataLoad(cm_host, options)
-
-    if options.load:
-        rd.load()
-    if options.test:
-        rd.test()
-
-
-if __name__ == "__main__":
-    options, cm_host = parse_args()
-    main(cm_host=cm_host, options=options)

[4/8] impala git commit: IMPALA-7024: Convert Coordinator::wait_lock_ to SpinLock

Posted by ta...@apache.org.

IMPALA-7024: Convert Coordinator::wait_lock_ to SpinLock

For consistency with the other locks in this class, use
SpinLock rather than boost::mutex. We expect SpinLock to
work okay for locks that block since it is adaptive.

This came up in the code review for IMPALA-5384, but I
wanted to make this change separately, just in case of
unforseen side-effects.

Change-Id: I48b2e7f819b1180f82811abf5701c8d07e6505e3
Reviewed-on: http://gerrit.cloudera.org:8080/10392
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/4ce9dbb4
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/4ce9dbb4
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/4ce9dbb4

Branch: refs/heads/master
Commit: 4ce9dbb475ef52812691d0cddae6df75ab496369
Parents: 33f90cc
Author: Dan Hecht <dh...@cloudera.com>
Authored: Mon May 14 11:45:25 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon May 14 22:17:58 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/coordinator.cc | 2 +-
 be/src/runtime/coordinator.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/4ce9dbb4/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index 5a3722c..91f2e29 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -586,7 +586,7 @@ void Coordinator::WaitForBackends() {
 }
 
 Status Coordinator::Wait() {
-  lock_guard<mutex> l(wait_lock_);
+  lock_guard<SpinLock> l(wait_lock_);
   SCOPED_TIMER(query_profile_->total_time_counter());
   if (has_called_wait_) return Status::OK();
   has_called_wait_ = true;

http://git-wip-us.apache.org/repos/asf/impala/blob/4ce9dbb4/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index a0b9b4c..36c9f26 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <vector>
 #include <boost/scoped_ptr.hpp>
-#include <boost/thread/mutex.hpp>
 #include <boost/unordered_map.hpp>
 #include <rapidjson/document.h>
 
@@ -33,6 +32,7 @@
 #include "util/condition-variable.h"
 #include "util/progress-updater.h"
 #include "util/runtime-profile-counters.h"
+#include "util/spinlock.h"
 
 namespace impala {
 
@@ -205,7 +205,7 @@ class Coordinator { // NOLINT: The member variables could be re-ordered to save
   PlanRootSink* coord_sink_ = nullptr;
 
   /// ensures single-threaded execution of Wait(). See lock ordering class comment.
-  boost::mutex wait_lock_;
+  SpinLock wait_lock_;
 
   bool has_called_wait_ = false;  // if true, Wait() was called; protected by wait_lock_

[7/8] impala git commit: IMPALA-7018: fix spill-to-disk encryption err handling

Posted by ta...@apache.org.

IMPALA-7018: fix spill-to-disk encryption err handling

The EVP_CIPHER_CTX_ctrl() function was being misused:
1. It was called before initialising the context
2. Errors were not being handled (including the error from #1)

Testing:
Added some checks to assert that the OpenSSL error queue is empty.

Change-Id: I054a5e76df51b293f4728d30fd3b3cd7640624fb
Reviewed-on: http://gerrit.cloudera.org:8080/10385
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/675e4c16
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/675e4c16
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/675e4c16

Branch: refs/heads/master
Commit: 675e4c161fb39b5825af5ecff10a1a41ebdb3d16
Parents: 3033f3b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri May 11 16:43:21 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 15 03:37:27 2018 +0000

----------------------------------------------------------------------
 be/src/util/openssl-util-test.cc |  5 ++++-
 be/src/util/openssl-util.cc      | 32 +++++++++++++++++++-------------
 2 files changed, 23 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/675e4c16/be/src/util/openssl-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc
index 76f65a5..77cb255 100644
--- a/be/src/util/openssl-util-test.cc
+++ b/be/src/util/openssl-util-test.cc
@@ -18,6 +18,7 @@
 #include <random>
 
 #include <gtest/gtest.h>
+#include <openssl/err.h>
 #include <openssl/rand.h>
 
 #include "common/init.h"
@@ -32,7 +33,9 @@ namespace impala {
 class OpenSSLUtilTest : public ::testing::Test {
  protected:
   virtual void SetUp() { SeedOpenSSLRNG(); }
-  virtual void TearDown() {}
+  virtual void TearDown() {
+    EXPECT_EQ(ERR_peek_error(), 0) << "Did not clear OpenSSL error queue";
+  }
 
   /// Fill buffer 'data' with 'len' bytes of random binary data from 'rng_'.
   /// 'len' must be a multiple of 8 bytes'.

http://git-wip-us.apache.org/repos/asf/impala/blob/675e4c16/be/src/util/openssl-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc
index 2b368da..065dbc8 100644
--- a/be/src/util/openssl-util.cc
+++ b/be/src/util/openssl-util.cc
@@ -100,10 +100,10 @@ static int OpenSSLErrCallback(const char* buf, size_t len, void* ctx) {
 }
 
 // Called upon OpenSSL errors; returns a non-OK status with an error message.
-static Status OpenSSLErr(const string& function) {
+static Status OpenSSLErr(const string& function, const string& context) {
   stringstream errstream;
   ERR_print_errors_cb(OpenSSLErrCallback, &errstream);
-  return Status(Substitute("OpenSSL error in $0: $1", function, errstream.str()));
+  return Status(Substitute("OpenSSL error in $0 $1: $2", function, context, errstream.str()));
 }
 
 void SeedOpenSSLRNG() {
@@ -113,6 +113,7 @@ void SeedOpenSSLRNG() {
 void IntegrityHash::Compute(const uint8_t* data, int64_t len) {
   // Explicitly ignore the return value from SHA256(); it can't fail.
   (void)SHA256(data, len, hash_);
+  DCHECK_EQ(ERR_peek_error(), 0) << "Did not clear OpenSSL error queue";
 }
 
 bool IntegrityHash::Verify(const uint8_t* data, int64_t len) const {
@@ -144,15 +145,12 @@ Status EncryptionKey::EncryptInternal(
     bool encrypt, const uint8_t* data, int64_t len, uint8_t* out) {
   DCHECK(initialized_);
   DCHECK_GE(len, 0);
+  const char* err_context = encrypt ? "encrypting" : "decrypting";
   // Create and initialize the context for encryption
   EVP_CIPHER_CTX ctx;
   EVP_CIPHER_CTX_init(&ctx);
   EVP_CIPHER_CTX_set_padding(&ctx, 0);
 
-  if (IsGcmMode()) {
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, AES_BLOCK_SIZE, NULL);
-  }
-
   // Start encryption/decryption.  We use a 256-bit AES key, and the cipher block mode
   // is either CTR or CFB(stream cipher), both of which support arbitrary length
   // ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR
@@ -161,9 +159,13 @@ Status EncryptionKey::EncryptInternal(
   const EVP_CIPHER* evpCipher = GetCipher();
   int success = encrypt ? EVP_EncryptInit_ex(&ctx, evpCipher, NULL, key_, iv_) :
                           EVP_DecryptInit_ex(&ctx, evpCipher, NULL, key_, iv_);
-
   if (success != 1) {
-    return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex");
+    return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex", err_context);
+  }
+  if (IsGcmMode()) {
+    if (EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, AES_BLOCK_SIZE, NULL) != 1) {
+      return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
+    }
   }
 
   // The OpenSSL encryption APIs use ints for buffer lengths for some reason. To support
@@ -176,7 +178,7 @@ Status EncryptionKey::EncryptInternal(
         EVP_EncryptUpdate(&ctx, out + offset, &out_len, data + offset, in_len) :
         EVP_DecryptUpdate(&ctx, out + offset, &out_len, data + offset, in_len);
     if (success != 1) {
-      return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate");
+      return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate", err_context);
     }
     // This is safe because we're using CTR/CFB mode without padding.
     DCHECK_EQ(in_len, out_len);
@@ -185,7 +187,9 @@ Status EncryptionKey::EncryptInternal(
 
   if (IsGcmMode() && !encrypt) {
     // Set expected tag value
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_TAG, AES_BLOCK_SIZE, gcm_tag_);
+    if (EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_TAG, AES_BLOCK_SIZE, gcm_tag_) != 1) {
+      return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
+    }
   }
 
   // Finalize encryption or decryption.
@@ -193,15 +197,17 @@ Status EncryptionKey::EncryptInternal(
   success = encrypt ? EVP_EncryptFinal_ex(&ctx, out + offset, &final_out_len) :
                       EVP_DecryptFinal_ex(&ctx, out + offset, &final_out_len);
   if (success != 1) {
-    return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal");
+    return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal", err_context);
   }
 
   if (IsGcmMode() && encrypt) {
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, AES_BLOCK_SIZE, gcm_tag_);
+    if (EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, AES_BLOCK_SIZE, gcm_tag_) != 1) {
+      return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
+    }
   }
-
   // Again safe due to GCM/CTR/CFB with no padding
   DCHECK_EQ(final_out_len, 0);
+  DCHECK_EQ(ERR_peek_error(), 0) << "Did not clear OpenSSL error queue";
   return Status::OK();
 }

[5/8] impala git commit: Remove IMPALA_THRIFT_JAVA_VERSION and untested Darwin Thrift versions.

Posted by ta...@apache.org.

Remove IMPALA_THRIFT_JAVA_VERSION and untested Darwin Thrift versions.

The singular use of IMPALA_THRIFT_JAVA_VERSION was in
impala-parent/pom.xml. We can reduce complexity by just inlining
the version there, like we do with several other Java dependencies.

Meanwhile, with the upgrade to Thrift 0.9.3, it doesn't make sense
to retain the Darwin path for Thrift 0.9.2. The reality is likely
that nobody is building Impala on Darwin.

Cherry-picks: not for 2.x

Change-Id: I98f8c0b344afd001864653659c045b5d3c3e94ac
Reviewed-on: http://gerrit.cloudera.org:8080/10361
Reviewed-by: Tianyi Wang <tw...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c6fc0be2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c6fc0be2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c6fc0be2

Branch: refs/heads/master
Commit: c6fc0be282ec9e8d3b3b043f79570d4cef4fb65d
Parents: 4ce9dbb
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Wed May 9 14:57:17 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon May 14 23:09:48 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh  | 6 ------
 impala-parent/pom.xml | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/c6fc0be2/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 8c57afc..97bec30 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -148,8 +148,6 @@ export IMPALA_TPC_H_VERSION=2.17.0
 unset IMPALA_TPC_H_URL
 export IMPALA_THRIFT_VERSION=0.9.3-p4
 unset IMPALA_THRIFT_URL
-export IMPALA_THRIFT_JAVA_VERSION=0.9.3
-unset IMPALA_THRIFT_JAVA_URL
 export IMPALA_ZLIB_VERSION=1.2.8
 unset IMPALA_ZLIB_URL
 
@@ -160,10 +158,6 @@ if [[ $OSTYPE == "darwin"* ]]; then
   unset IMPALA_GPERFTOOLS_URL
   IMPALA_OPENSSL_VERSION=1.0.1p
   unset IMPALA_OPENSSL_URL
-  IMPALA_THRIFT_VERSION=0.9.2
-  unset IMPALA_THRIFT_URL
-  IMPALA_THRIFT_JAVA_VERSION=0.9.2
-  unset IMPALA_THRIFT_JAVA_URL
 fi
 
 # Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.

http://git-wip-us.apache.org/repos/asf/impala/blob/c6fc0be2/impala-parent/pom.xml
----------------------------------------------------------------------
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index b806f2b..f1990d6 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -42,7 +42,7 @@ under the License.
     <hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
     <kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
-    <thrift.version>${env.IMPALA_THRIFT_JAVA_VERSION}</thrift.version>
+    <thrift.version>0.9.3</thrift.version>
     <impala.extdatasrc.api.version>1.0-SNAPSHOT</impala.extdatasrc.api.version>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <kudu.version>${env.KUDU_JAVA_VERSION}</kudu.version>

[8/8] impala git commit: IMPALA-6916: Implement COMMENT ON DATABASE

Posted by ta...@apache.org.

IMPALA-6916: Implement COMMENT ON DATABASE

This patch implements updating comment on a database.

Syntax:
COMMENT ON DATABASE db IS 'comment'

Testing:
- Added new front-end tests
- Ran all front-end tests
- Added new end-to-end tests
- Ran end-to-end DDL tests

Change-Id: Ifcf909c18f97073346f6f603538bf921e69fbb00
Reviewed-on: http://gerrit.cloudera.org:8080/10171
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/97ecc154
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/97ecc154
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/97ecc154

Branch: refs/heads/master
Commit: 97ecc154bdb43912eab2d4748693f24bd5da5ba6
Parents: 675e4c1
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Mon Apr 23 20:48:49 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 15 04:02:30 2018 +0000

----------------------------------------------------------------------
 common/thrift/CatalogService.thrift             |  3 ++
 common/thrift/JniCatalog.thrift                 |  8 +++
 fe/src/main/cup/sql-parser.cup                  | 17 ++++++-
 .../apache/impala/analysis/AnalysisContext.java |  9 +++-
 .../apache/impala/analysis/CommentOnDbStmt.java | 47 ++++++++++++++++++
 .../apache/impala/analysis/CommentOnStmt.java   | 37 ++++++++++++++
 .../impala/service/CatalogOpExecutor.java       | 51 ++++++++++++++++++++
 .../org/apache/impala/service/Frontend.java     | 10 ++++
 .../apache/impala/analysis/AnalyzeDDLTest.java  |  9 ++++
 .../impala/analysis/AuthorizationTest.java      | 11 +++++
 .../org/apache/impala/analysis/ParserTest.java  | 15 ++++--
 tests/metadata/test_ddl.py                      | 16 ++++++
 tests/metadata/test_ddl_base.py                 |  5 ++
 13 files changed, 233 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/common/thrift/CatalogService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/CatalogService.thrift b/common/thrift/CatalogService.thrift
index 8a93998..6c02722 100644
--- a/common/thrift/CatalogService.thrift
+++ b/common/thrift/CatalogService.thrift
@@ -129,6 +129,9 @@ struct TDdlExecRequest {
 
   // True if SYNC_DDL is set in query options
   22: required bool sync_ddl
+
+  // Parameters for COMMENT ON
+  23: optional JniCatalog.TCommentOnParams comment_on_params
 }
 
 // Response from executing a TDdlExecRequest

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/common/thrift/JniCatalog.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/JniCatalog.thrift b/common/thrift/JniCatalog.thrift
index 371475b..d3bd7a0 100644
--- a/common/thrift/JniCatalog.thrift
+++ b/common/thrift/JniCatalog.thrift
@@ -52,6 +52,7 @@ enum TDdlType {
   GRANT_PRIVILEGE,
   REVOKE_PRIVILEGE,
   TRUNCATE_TABLE,
+  COMMENT_ON
 }
 
 // Types of ALTER TABLE commands supported.
@@ -630,3 +631,10 @@ struct TGetCatalogUsageResponse{
   2: required list<TTableUsageMetrics> frequently_accessed_tables
 }
 
+struct TCommentOnParams {
+  // Name of comment to alter. When this field is not set, the comment will be removed.
+  1: optional string comment
+
+  // Name of database to alter.
+  2: optional string db
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/cup/sql-parser.cup
----------------------------------------------------------------------
diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup
index f2d7cef..261f8ec 100644
--- a/fe/src/main/cup/sql-parser.cup
+++ b/fe/src/main/cup/sql-parser.cup
@@ -409,6 +409,7 @@ nonterminal Qualifier union_op;
 
 nonterminal PartitionDef partition_def;
 nonterminal List<PartitionDef> partition_def_list;
+nonterminal CommentOnStmt comment_on_stmt;
 nonterminal AlterTableStmt alter_tbl_stmt;
 nonterminal StatementBase alter_view_stmt;
 nonterminal ComputeStatsStmt compute_stats_stmt;
@@ -443,7 +444,7 @@ nonterminal ArrayList<StructField> struct_field_def_list;
 // Options for DDL commands - CREATE/DROP/ALTER
 nonterminal HdfsCachingOp cache_op_val, opt_cache_op_val;
 nonterminal BigDecimal opt_cache_op_replication;
-nonterminal String comment_val, opt_comment_val;
+nonterminal String comment_val, opt_comment_val, nullable_comment_val;
 nonterminal Boolean external_val;
 nonterminal Boolean purge_val;
 nonterminal String opt_init_string_val;
@@ -650,6 +651,8 @@ stmt ::=
   {: RESULT = grant_privilege; :}
   | revoke_privilege_stmt:revoke_privilege
   {: RESULT = revoke_privilege; :}
+  | comment_on_stmt:comment_on
+  {: RESULT = comment_on; :}
   | stmt:s SEMICOLON
   {: RESULT = s; :}
   ;
@@ -1009,6 +1012,11 @@ partition_def_list ::=
   :}
   ;
 
+comment_on_stmt ::=
+  KW_COMMENT KW_ON KW_DATABASE ident_or_default:db_name KW_IS nullable_comment_val:comment
+  {: RESULT = new CommentOnDbStmt(db_name, comment); :}
+  ;
+
 alter_tbl_stmt ::=
   KW_ALTER KW_TABLE table_name:table replace_existing_cols_val:replace KW_COLUMNS
   LPAREN column_def_list:col_defs RPAREN
@@ -1474,6 +1482,13 @@ opt_comment_val ::=
   {: RESULT = null; :}
   ;
 
+nullable_comment_val ::=
+  STRING_LITERAL:comment
+  {: RESULT = comment; :}
+  | KW_NULL
+  {: RESULT = null; :}
+  ;
+
 location_val ::=
   KW_LOCATION STRING_LITERAL:location
   {: RESULT = new HdfsUri(location); :}

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
index c886079..791e528 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
@@ -149,6 +149,7 @@ public class AnalysisContext {
     public UpdateStmt getUpdateStmt() { return (UpdateStmt) stmt_; }
     public boolean isDeleteStmt() { return stmt_ instanceof DeleteStmt; }
     public DeleteStmt getDeleteStmt() { return (DeleteStmt) stmt_; }
+    public boolean isCommentOnStmt() { return stmt_ instanceof CommentOnStmt; }
 
     public boolean isCatalogOp() {
       return isUseStmt() || isViewMetadataStmt() || isDdlStmt();
@@ -161,7 +162,8 @@ public class AnalysisContext {
           isAlterViewStmt() || isComputeStatsStmt() || isCreateUdfStmt() ||
           isCreateUdaStmt() || isDropFunctionStmt() || isCreateTableAsSelectStmt() ||
           isCreateDataSrcStmt() || isDropDataSrcStmt() || isDropStatsStmt() ||
-          isCreateDropRoleStmt() || isGrantRevokeStmt() || isTruncateStmt();
+          isCreateDropRoleStmt() || isGrantRevokeStmt() || isTruncateStmt() ||
+          isCommentOnStmt();
     }
 
     private boolean isViewMetadataStmt() {
@@ -349,6 +351,11 @@ public class AnalysisContext {
       return (ShowCreateFunctionStmt) stmt_;
     }
 
+    public CommentOnStmt getCommentOnStmt() {
+      Preconditions.checkState(isCommentOnStmt());
+      return (CommentOnStmt) stmt_;
+    }
+
     public StatementBase getStmt() { return stmt_; }
     public Analyzer getAnalyzer() { return analyzer_; }
     public Set<TAccessEvent> getAccessEvents() { return analyzer_.getAccessEvents(); }

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/java/org/apache/impala/analysis/CommentOnDbStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CommentOnDbStmt.java b/fe/src/main/java/org/apache/impala/analysis/CommentOnDbStmt.java
new file mode 100644
index 0000000..8f622fd
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/CommentOnDbStmt.java
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import com.google.common.base.Preconditions;
+import org.apache.impala.authorization.Privilege;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.thrift.TCommentOnParams;
+
+/**
+ * Represents a COMMENT ON DATABASE db IS 'comment' statement.
+ */
+public class CommentOnDbStmt extends CommentOnStmt {
+  private final String dbName_;
+
+  public CommentOnDbStmt(String dbName, String comment) {
+    super(comment);
+    Preconditions.checkNotNull(dbName);
+    dbName_ = dbName;
+  }
+
+  @Override
+  public void analyze(Analyzer analyzer) throws AnalysisException {
+    analyzer.getDb(dbName_, Privilege.ALTER);
+  }
+
+  public TCommentOnParams toThrift() {
+    TCommentOnParams params = super.toThrift();
+    params.setDb(dbName_);
+    return params;
+  }
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/java/org/apache/impala/analysis/CommentOnStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CommentOnStmt.java b/fe/src/main/java/org/apache/impala/analysis/CommentOnStmt.java
new file mode 100644
index 0000000..dffc08d
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/CommentOnStmt.java
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import org.apache.impala.thrift.TCommentOnParams;
+
+/**
+ * A base class for COMMENT ON statement.
+ */
+public class CommentOnStmt extends StatementBase {
+  protected final String comment_;
+
+  public CommentOnStmt(String comment) {
+    comment_ = comment;
+  }
+
+  public TCommentOnParams toThrift() {
+    TCommentOnParams params = new TCommentOnParams();
+    params.setComment(comment_);
+    return params;
+  }
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index fdee124..ea67389 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -109,6 +110,7 @@ import org.apache.impala.thrift.TColumn;
 import org.apache.impala.thrift.TColumnStats;
 import org.apache.impala.thrift.TColumnType;
 import org.apache.impala.thrift.TColumnValue;
+import org.apache.impala.thrift.TCommentOnParams;
 import org.apache.impala.thrift.TCreateDataSourceParams;
 import org.apache.impala.thrift.TCreateDbParams;
 import org.apache.impala.thrift.TCreateDropRoleParams;
@@ -325,6 +327,9 @@ public class CatalogOpExecutor {
         grantRevokeRolePrivilege(requestingUser,
             ddlRequest.getGrant_revoke_priv_params(), response);
         break;
+      case COMMENT_ON:
+        alterCommentOn(ddlRequest.getComment_on_params(), response);
+        break;
       default: throw new IllegalStateException("Unexpected DDL exec request type: " +
           ddlRequest.ddl_type);
     }
@@ -3503,4 +3508,50 @@ public class CatalogOpExecutor {
     Preconditions.checkState(tbl.isLoaded());
     return tbl;
   }
+
+  private void alterCommentOn(TCommentOnParams params, TDdlExecResponse response)
+      throws ImpalaRuntimeException, CatalogException, InternalException {
+    if (params.getDb() != null) {
+      alterCommentOnDb(params.getDb(), params.getComment(), response);
+    }
+  }
+
+  private void alterCommentOnDb(String dbName, String comment, TDdlExecResponse response)
+      throws ImpalaRuntimeException, CatalogException, InternalException {
+    Db db = catalog_.getDb(dbName);
+    if (db == null) {
+      throw new CatalogException("Database: " + db.getName() + " does not exist.");
+    }
+    synchronized (metastoreDdlLock_) {
+      Database msDb = db.getMetaStoreDb();
+      String originalComment = msDb.getDescription();
+      msDb.setDescription(comment);
+      try {
+        applyAlterDatabase(db);
+      } catch (ImpalaRuntimeException e) {
+        msDb.setDescription(originalComment);
+        throw e;
+      }
+    }
+    addDbToCatalogUpdate(db, response.result);
+    addSummary(response, "Updated database.");
+  }
+
+  private void addDbToCatalogUpdate(Db db, TCatalogUpdateResult result) {
+    Preconditions.checkNotNull(db);
+    // Updating the new catalog version and setting it to the DB catalog version while
+    // holding the catalog version lock for an atomic operation. Most DB operations are
+    // short-lived. It is unnecessary to have a fine-grained DB lock.
+    catalog_.getLock().writeLock().lock();
+    try {
+      long newCatalogVersion = catalog_.incrementAndGetCatalogVersion();
+      db.setCatalogVersion(newCatalogVersion);
+      TCatalogObject updatedCatalogObject = db.toTCatalogObject();
+      updatedCatalogObject.setCatalog_version(newCatalogVersion);
+      result.addToUpdated_catalog_objects(updatedCatalogObject);
+      result.setVersion(updatedCatalogObject.getCatalog_version());
+    } finally {
+      catalog_.getLock().writeLock().unlock();
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/main/java/org/apache/impala/service/Frontend.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index ae3975c..9b86312 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
+import org.apache.impala.analysis.CommentOnStmt;
 import org.apache.impala.analysis.CreateDataSrcStmt;
 import org.apache.impala.analysis.CreateDropRoleStmt;
 import org.apache.impala.analysis.CreateUdaStmt;
@@ -99,6 +100,7 @@ import org.apache.impala.thrift.TCatalogOpType;
 import org.apache.impala.thrift.TCatalogServiceRequestHeader;
 import org.apache.impala.thrift.TColumn;
 import org.apache.impala.thrift.TColumnValue;
+import org.apache.impala.thrift.TCommentOnParams;
 import org.apache.impala.thrift.TCreateDropRoleParams;
 import org.apache.impala.thrift.TDdlExecRequest;
 import org.apache.impala.thrift.TDdlType;
@@ -498,6 +500,14 @@ public class Frontend {
       req.setGrant_revoke_priv_params(params);
       ddl.op_type = TCatalogOpType.DDL;
       ddl.setDdl_params(req);
+    } else if (analysis.isCommentOnStmt()) {
+      CommentOnStmt commentOnStmt = analysis.getCommentOnStmt();
+      TCommentOnParams params = commentOnStmt.toThrift();
+      TDdlExecRequest req = new TDdlExecRequest();
+      req.setDdl_type(TDdlType.COMMENT_ON);
+      req.setComment_on_params(params);
+      ddl.op_type = TCatalogOpType.DDL;
+      ddl.setDdl_params(req);
     } else {
       throw new IllegalStateException("Unexpected CatalogOp statement type.");
     }

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index c2bae5c..e5cd766 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -3902,4 +3902,13 @@ public class AnalyzeDDLTest extends FrontendTestBase {
       }
     }
   }
+
+  @Test
+  public void TestCommentOn() {
+    AnalyzesOk("comment on database functional is 'comment'");
+    AnalyzesOk("comment on database functional is ''");
+    AnalyzesOk("comment on database functional is null");
+    AnalysisError("comment on database doesntexist is 'comment'",
+        "Database does not exist: doesntexist");
+  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
index b04e138..89051c1 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
@@ -3030,6 +3030,17 @@ public class AuthorizationTest extends FrontendTestBase {
     }
   }
 
+  @Test
+  public void TestCommentOn() throws ImpalaException {
+    // User has ALTER privilege on functional_text_lzo database.
+    AuthzOk("comment on database functional_text_lzo is 'comment'");
+    // User does not have ALTER privilege on functional database.
+    AuthzError("comment on database functional is 'comment'",
+        "User '%s' does not have privileges to execute 'ALTER' on: functional");
+    AuthzError("comment on database doesntexist is 'comment'",
+        "User '%s' does not have privileges to execute 'ALTER' on: doesntexist");
+  }
+
   private void TestWithIncorrectConfig(AuthorizationConfig authzConfig, User user)
       throws ImpalaException {
     Frontend fe = new Frontend(authzConfig, ctx_.catalog);

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
index ab0eb78..b5082db 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
@@ -3233,9 +3233,9 @@ public class ParserTest extends FrontendTestBase {
         "c, b, c from t\n" +
         "^\n" +
         "Encountered: IDENTIFIER\n" +
-        "Expected: ALTER, COMPUTE, CREATE, DELETE, DESCRIBE, DROP, EXPLAIN, GRANT, " +
-        "INSERT, INVALIDATE, LOAD, REFRESH, REVOKE, SELECT, SET, SHOW, TRUNCATE, " +
-        "UPDATE, UPSERT, USE, VALUES, WITH\n");
+        "Expected: ALTER, COMMENT, COMPUTE, CREATE, DELETE, DESCRIBE, DROP, EXPLAIN, " +
+        "GRANT, INSERT, INVALIDATE, LOAD, REFRESH, REVOKE, SELECT, SET, SHOW, " +
+        "TRUNCATE, UPDATE, UPSERT, USE, VALUES, WITH\n");
 
     // missing select list
     ParserError("select from t",
@@ -3736,4 +3736,13 @@ public class ParserTest extends FrontendTestBase {
     ParsesOk("DESCRIBE functional.alltypes;");
     ParsesOk("SET num_nodes=1;");
   }
+
+  @Test
+  public void TestCommentOn() {
+    ParsesOk("COMMENT ON DATABASE db IS 'comment'");
+    ParsesOk("COMMENT ON DATABASE db IS ''");
+    ParsesOk("COMMENT ON DATABASE db IS NULL");
+    ParserError("COMMENT ON DATABASE IS 'comment'");
+    ParserError("COMMENT ON DATABASE db IS");
+  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/tests/metadata/test_ddl.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 0830060..c2f34a9 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -194,6 +194,22 @@ class TestDdlStatements(TestDdlBase):
     self.run_test_case('QueryTest/create-database', vector, use_db=unique_database,
         multiple_impalad=self._use_multiple_impalad(vector))
 
+  def test_comment_on_database(self, vector, unique_database):
+    comment = self._get_db_comment(unique_database)
+    assert '' == comment
+
+    self.client.execute("comment on database {0} is 'comment'".format(unique_database))
+    comment = self._get_db_comment(unique_database)
+    assert 'comment' == comment
+
+    self.client.execute("comment on database {0} is ''".format(unique_database))
+    comment = self._get_db_comment(unique_database)
+    assert '' == comment
+
+    self.client.execute("comment on database {0} is null".format(unique_database))
+    comment = self._get_db_comment(unique_database)
+    assert '' == comment
+
   # There is a query in QueryTest/create-table that references nested types, which is not
   # supported if old joins and aggs are enabled. Since we do not get any meaningful
   # additional coverage by running a DDL test under the old aggs and joins, it can be

http://git-wip-us.apache.org/repos/asf/impala/blob/97ecc154/tests/metadata/test_ddl_base.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_ddl_base.py b/tests/metadata/test_ddl_base.py
index 3044ef0..0b0968d 100644
--- a/tests/metadata/test_ddl_base.py
+++ b/tests/metadata/test_ddl_base.py
@@ -87,3 +87,8 @@ class TestDdlBase(ImpalaTestSuite):
           break
         properties[row[1].rstrip()] = row[2].rstrip()
     return properties
+
+  def _get_db_comment(self, db_name):
+    """Extracts the DB comment from the output of DESCRIBE DATABASE"""
+    result = self.client.execute("describe database {0}".format(db_name))
+    return result.data[0].split('\t')[2]
\ No newline at end of file

[6/8] impala git commit: IMPALA-3813: [DOCS] How to create a Kudu table with a replication factor

Posted by ta...@apache.org.

IMPALA-3813: [DOCS] How to create a Kudu table with a replication factor

Described how to create a Kudu table with a replication factor that is
not the default value of 3.

Change-Id: I9dc68dcd395fcd0bd31563ea46229a12553482dc
Reviewed-on: http://gerrit.cloudera.org:8080/10401
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3033f3b3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3033f3b3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3033f3b3

Branch: refs/heads/master
Commit: 3033f3b3190050f479117f9a27dae5a32d4b8260
Parents: c6fc0be
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon May 14 16:54:55 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 15 00:21:50 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_create_table.xml |  5 +++++
 docs/topics/impala_kudu.xml         | 36 ++++++++++++++++++++++++--------
 2 files changed, 32 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3033f3b3/docs/topics/impala_create_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_table.xml b/docs/topics/impala_create_table.xml
index a60cd40..3434d9e 100644
--- a/docs/topics/impala_create_table.xml
+++ b/docs/topics/impala_create_table.xml
@@ -565,6 +565,11 @@ CREATE TABLE ctas_t1
       <xref href="impala_kudu.xml#kudu_primary_key_attribute"/>.
     </p>
 
+    <p>
+      For more on creating a Kudu table with a specific replication factor, see
+        <xref href="impala_kudu.xml#kudu_replication_factor"/>.
+    </p>
+
     <p rev="IMPALA-3719">
       For more on the <codeph>NULL</codeph> and <codeph>NOT NULL</codeph> attributes, see
       <xref href="impala_kudu.xml#kudu_not_null_attribute"/>.

http://git-wip-us.apache.org/repos/asf/impala/blob/3033f3b3/docs/topics/impala_kudu.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_kudu.xml b/docs/topics/impala_kudu.xml
index 946a066..145654e 100644
--- a/docs/topics/impala_kudu.xml
+++ b/docs/topics/impala_kudu.xml
@@ -144,25 +144,43 @@ under the License.
 
           <li>
             <p>
-              Data is physically divided based on units of storage called <term>tablets</term>. Tablets are
-              stored by <term>tablet servers</term>. Each tablet server can store multiple tablets,
-              and each tablet is replicated across multiple tablet servers, managed automatically by Kudu.
-              Where practical, colocate the tablet servers on the same hosts as the DataNodes, although that is not required.
+              Data is physically divided based on units of storage called
+                <term>tablets</term>. Tablets are stored by <term>tablet
+                servers</term>. Each tablet server can store multiple tablets,
+              and each tablet is replicated across multiple tablet servers,
+              managed automatically by Kudu. Where practical, co-locate the
+              tablet servers on the same hosts as the DataNodes, although that
+              is not required.
             </p>
           </li>
         </ul>
 
-        <p>
-          One consideration for the cluster topology is that the number of replicas for a Kudu table
-          must be odd.
-        </p>
-
       </conbody>
 
     </concept>
 
   </concept>
 
+  <concept id="kudu_replication_factor">
+    <title>Kudu Replication Factor</title>
+    <conbody>
+      <p>
+        By default, Kudu tables created through Impala use a tablet
+        replication factor of 3. To change the replication factor for a Kudu
+        table, specify the replication factor using <codeph>TBLPROPERTIES
+          ('kudu.num_tablet_replicas' = '<i>n</i>')</codeph> in the <keyword
+          keyref="create_table"/> statement.
+      </p>
+
+      <p>
+        The number of replicas for a Kudu table must be odd.
+      </p>
+
+      <p> Altering the <codeph>kudu.num_tablet_replicas</codeph> property after
+        table creation currently has no effect. </p>
+    </conbody>
+  </concept>
+
   <concept id="kudu_ddl">
 
     <title>Impala DDL Enhancements for Kudu Tables (CREATE TABLE and ALTER TABLE)</title>

[3/8] impala git commit: Add a missing PrintId()

Posted by ta...@apache.org.

Add a missing PrintId()

For consistency, add a PrintId() around a query_id() used in a stream,
which was missing from this commit:
IMPALA-5384, part 2: Simplify Coordinator locking and clarify state

This change was put into the cherry-pick for 2.x, so:
Cherry-picks: not for 2.x

Change-Id: I701115fb7bc88cd034ddb7f0b99cf067ef2a7078
Reviewed-on: http://gerrit.cloudera.org:8080/10391
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/33f90cc7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/33f90cc7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/33f90cc7

Branch: refs/heads/master
Commit: 33f90cc74b3003c7657d003f522dc2cf20b3ba0c
Parents: f2963a2
Author: Dan Hecht <dh...@cloudera.com>
Authored: Mon May 14 11:21:51 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon May 14 21:54:39 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/coordinator.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/33f90cc7/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index fa3a37e..5a3722c 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -656,7 +656,7 @@ void Coordinator::CancelBackends() {
 }
 
 Status Coordinator::UpdateBackendExecStatus(const TReportExecStatusParams& params) {
-  VLOG_FILE << "UpdateBackendExecStatus() query_id=" << query_id()
+  VLOG_FILE << "UpdateBackendExecStatus() query_id=" << PrintId(query_id())
             << " backend_idx=" << params.coord_state_idx;
   if (params.coord_state_idx >= backend_states_.size()) {
     return Status(TErrorCode::INTERNAL_ERROR,

[2/8] impala git commit: impala-6233: [DOCS] Documented the COMMENT clause for CREATE VIEW

Posted by ta...@apache.org.

impala-6233: [DOCS] Documented the COMMENT clause for CREATE VIEW

Change-Id: I176d525925c8dc5c5b83612da43b349049764d2b
Reviewed-on: http://gerrit.cloudera.org:8080/10312
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f2963a20
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f2963a20
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f2963a20

Branch: refs/heads/master
Commit: f2963a208a929c985024cb0fb2c291572f5cd1e7
Parents: fc47a54
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri May 4 14:46:07 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon May 14 21:52:57 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_create_view.xml | 118 +++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f2963a20/docs/topics/impala_create_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_create_view.xml b/docs/topics/impala_create_view.xml
index c638b69..8a5cca2 100644
--- a/docs/topics/impala_create_view.xml
+++ b/docs/topics/impala_create_view.xml
@@ -21,7 +21,13 @@ under the License.
 <concept rev="1.1" id="create_view">
 
   <title>CREATE VIEW Statement</title>
-  <titlealts audience="PDF"><navtitle>CREATE VIEW</navtitle></titlealts>
+
+  <titlealts audience="PDF">
+
+    <navtitle>CREATE VIEW</navtitle>
+
+  </titlealts>
+
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -38,21 +44,23 @@ under the License.
   <conbody>
 
     <p>
-      <indexterm audience="hidden">CREATE VIEW statement</indexterm>
-      The <codeph>CREATE VIEW</codeph> statement lets you create a shorthand abbreviation for a more complicated
-      query. The base query can involve joins, expressions, reordered columns, column aliases, and other SQL
-      features that can make a query hard to understand or maintain.
+      The <codeph>CREATE VIEW</codeph> statement lets you create a shorthand abbreviation for a
+      more complicated query. The base query can involve joins, expressions, reordered columns,
+      column aliases, and other SQL features that can make a query hard to understand or
+      maintain.
     </p>
 
     <p>
-      Because a view is purely a logical construct (an alias for a query) with no physical data behind it,
-      <codeph>ALTER VIEW</codeph> only involves changes to metadata in the metastore database, not any data files
-      in HDFS.
+      Because a view is purely a logical construct (an alias for a query) with no physical data
+      behind it, <codeph>ALTER VIEW</codeph> only involves changes to metadata in the metastore
+      database, not any data files in HDFS.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
-<codeblock>CREATE VIEW [IF NOT EXISTS] <varname>view_name</varname> [(<varname>column_list</varname>)]
+<codeblock>CREATE VIEW [IF NOT EXISTS] <varname>view_name</varname>
+    [(<varname>column_name</varname> [COMMENT '<varname>column_comment</varname>'][, ...])]
+    [COMMENT '<varname>view_comment</varname>']
   AS <varname>select_statement</varname></codeblock>
 
     <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
@@ -60,59 +68,70 @@ under the License.
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
 
     <p>
-      The <codeph>CREATE VIEW</codeph> statement can be useful in scenarios such as the following:
+      The <codeph>CREATE VIEW</codeph> statement can be useful in scenarios such as the
+      following:
     </p>
 
     <ul>
       <li>
-        To turn even the most lengthy and complicated SQL query into a one-liner. You can issue simple queries
-        against the view from applications, scripts, or interactive queries in <cmdname>impala-shell</cmdname>.
-        For example:
+        To turn even the most lengthy and complicated SQL query into a one-liner. You can issue
+        simple queries against the view from applications, scripts, or interactive queries in
+        <cmdname>impala-shell</cmdname>. For example:
 <codeblock>select * from <varname>view_name</varname>;
 select * from <varname>view_name</varname> order by c1 desc limit 10;</codeblock>
-        The more complicated and hard-to-read the original query, the more benefit there is to simplifying the
-        query using a view.
+        The more complicated and hard-to-read the original query, the more benefit there is to
+        simplifying the query using a view.
       </li>
 
       <li>
-        To hide the underlying table and column names, to minimize maintenance problems if those names change. In
-        that case, you re-create the view using the new names, and all queries that use the view rather than the
-        underlying tables keep running with no changes.
+        To hide the underlying table and column names, to minimize maintenance problems if those
+        names change. In that case, you re-create the view using the new names, and all queries
+        that use the view rather than the underlying tables keep running with no changes.
       </li>
 
       <li>
-        To experiment with optimization techniques and make the optimized queries available to all applications.
-        For example, if you find a combination of <codeph>WHERE</codeph> conditions, join order, join hints, and so
-        on that works the best for a class of queries, you can establish a view that incorporates the
-        best-performing techniques. Applications can then make relatively simple queries against the view, without
-        repeating the complicated and optimized logic over and over. If you later find a better way to optimize the
-        original query, when you re-create the view, all the applications immediately take advantage of the
-        optimized base query.
+        To experiment with optimization techniques and make the optimized queries available to
+        all applications. For example, if you find a combination of <codeph>WHERE</codeph>
+        conditions, join order, join hints, and so on that works the best for a class of
+        queries, you can establish a view that incorporates the best-performing techniques.
+        Applications can then make relatively simple queries against the view, without repeating
+        the complicated and optimized logic over and over. If you later find a better way to
+        optimize the original query, when you re-create the view, all the applications
+        immediately take advantage of the optimized base query.
       </li>
 
       <li>
-        To simplify a whole class of related queries, especially complicated queries involving joins between
-        multiple tables, complicated expressions in the column list, and other SQL syntax that makes the query
-        difficult to understand and debug. For example, you might create a view that joins several tables, filters
-        using several <codeph>WHERE</codeph> conditions, and selects several columns from the result set.
-        Applications might issue queries against this view that only vary in their <codeph>LIMIT</codeph>,
-        <codeph>ORDER BY</codeph>, and similar simple clauses.
+        To simplify a whole class of related queries, especially complicated queries involving
+        joins between multiple tables, complicated expressions in the column list, and other SQL
+        syntax that makes the query difficult to understand and debug. For example, you might
+        create a view that joins several tables, filters using several <codeph>WHERE</codeph>
+        conditions, and selects several columns from the result set. Applications might issue
+        queries against this view that only vary in their <codeph>LIMIT</codeph>, <codeph>ORDER
+        BY</codeph>, and similar simple clauses.
       </li>
     </ul>
 
     <p>
-      For queries that require repeating complicated clauses over and over again, for example in the select list,
-      <codeph>ORDER BY</codeph>, and <codeph>GROUP BY</codeph> clauses, you can use the <codeph>WITH</codeph>
-      clause as an alternative to creating a view.
+      For queries that require repeating complicated clauses over and over again, for example in
+      the select list, <codeph>ORDER BY</codeph>, and <codeph>GROUP BY</codeph> clauses, you can
+      use the <codeph>WITH</codeph> clause as an alternative to creating a view.
+    </p>
+
+    <p>
+      You can optionally specify the table-level and the column-level comments as in the
+      <codeph>CREATE TABLE</codeph> statement.
     </p>
 
     <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
     <p conref="../shared/impala_common.xml#common/complex_types_views"/>
+
     <p conref="../shared/impala_common.xml#common/complex_types_views_caveat"/>
 
     <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
 
     <p conref="../shared/impala_common.xml#common/security_blurb"/>
+
     <p conref="../shared/impala_common.xml#common/redaction_yes"/>
 
     <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
@@ -121,37 +140,38 @@ select * from <varname>view_name</varname> order by c1 desc limit 10;</codeblock
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
 
-<!-- TK: Elaborate on these, show queries and real output. -->
-
 <codeblock>-- Create a view that is exactly the same as the underlying table.
-create view v1 as select * from t1;
+CREATE VIEW v1 AS SELECT * FROM t1;
 
 -- Create a view that includes only certain columns from the underlying table.
-create view v2 as select c1, c3, c7 from t1;
+CREATE VIEW v2 AS SELECT c1, c3, c7 FROM t1;
 
 -- Create a view that filters the values from the underlying table.
-create view v3 as select distinct c1, c3, c7 from t1 where c1 is not null and c5 &gt; 0;
+CREATE VIEW v3 AS SELECT DISTINCT c1, c3, c7 FROM t1 WHERE c1 IS NOT NULL AND c5 &gt; 0;
 
 -- Create a view that that reorders and renames columns from the underlying table.
-create view v4 as select c4 as last_name, c6 as address, c2 as birth_date from t1;
+CREATE VIEW v4 AS SELECT c4 AS last_name, c6 AS address, c2 AS birth_date FROM t1;
 
 -- Create a view that runs functions to convert or transform certain columns.
-create view v5 as select c1, cast(c3 as string) c3, concat(c4,c5) c5, trim(c6) c6, "Constant" c8 from t1;
+CREATE VIEW v5 AS SELECT c1, CAST(c3 AS STRING) c3, CONCAT(c4,c5) c5, TRIM(c6) c6, "Constant" c8 FROM t1;
 
 -- Create a view that hides the complexity of a view query.
-create view v6 as select t1.c1, t2.c2 from t1 join t2 on t1.id = t2.id;
-</codeblock>
+CREATE VIEW v6 AS SELECT t1.c1, t2.c2 FROM t1 JOIN t2 ON t1.id = t2.id;
 
-<!-- These examples show CREATE VIEW and corresponding DROP VIEW statements, with different combinations
-     of qualified and unqualified names. -->
-
-    <p conref="../shared/impala_common.xml#common/create_drop_view_examples"/>
+-- Create a view with a column comment and a table comment.
+CREATE VIEW v7 (c1 COMMENT 'Comment for c1', c2) COMMENT 'Comment for v7' AS SELECT t1.c1, t1.c2 FROM t1;
+</codeblock>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <p>
-      <xref href="impala_views.xml#views"/>, <xref href="impala_alter_view.xml#alter_view"/>,
-      <xref href="impala_drop_view.xml#drop_view"/>
+      <xref href="impala_views.xml#views"/>,
+      <xref
+        href="impala_alter_view.xml#alter_view"/>,
+      <xref
+        href="impala_drop_view.xml#drop_view"/>
     </p>
+
   </conbody>
+
 </concept>